judgeval 0.1.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +202 -0
- package/README.md +340 -0
- package/dist/clients.d.ts +7 -0
- package/dist/clients.js +78 -0
- package/dist/clients.js.map +1 -0
- package/dist/common/integrations/langgraph.d.ts +40 -0
- package/dist/common/integrations/langgraph.js +444 -0
- package/dist/common/integrations/langgraph.js.map +1 -0
- package/dist/common/logger-instance.d.ts +3 -0
- package/dist/common/logger-instance.js +64 -0
- package/dist/common/logger-instance.js.map +1 -0
- package/dist/common/logger.d.ts +54 -0
- package/dist/common/logger.js +221 -0
- package/dist/common/logger.js.map +1 -0
- package/dist/common/tracer.d.ts +205 -0
- package/dist/common/tracer.js +1035 -0
- package/dist/common/tracer.js.map +1 -0
- package/dist/constants.d.ts +51 -0
- package/dist/constants.js +344 -0
- package/dist/constants.js.map +1 -0
- package/dist/data/example.d.ts +70 -0
- package/dist/data/example.js +125 -0
- package/dist/data/example.js.map +1 -0
- package/dist/data/result.d.ts +51 -0
- package/dist/data/result.js +83 -0
- package/dist/data/result.js.map +1 -0
- package/dist/evaluation-run.d.ts +44 -0
- package/dist/evaluation-run.js +136 -0
- package/dist/evaluation-run.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +73 -0
- package/dist/index.js.map +1 -0
- package/dist/judgment-client.d.ts +179 -0
- package/dist/judgment-client.js +1038 -0
- package/dist/judgment-client.js.map +1 -0
- package/dist/rules.d.ts +120 -0
- package/dist/rules.js +322 -0
- package/dist/rules.js.map +1 -0
- package/dist/run-evaluation.d.ts +78 -0
- package/dist/run-evaluation.js +618 -0
- package/dist/run-evaluation.js.map +1 -0
- package/dist/scorers/api-scorer.d.ts +79 -0
- package/dist/scorers/api-scorer.js +291 -0
- package/dist/scorers/api-scorer.js.map +1 -0
- package/dist/scorers/base-scorer.d.ts +100 -0
- package/dist/scorers/base-scorer.js +190 -0
- package/dist/scorers/base-scorer.js.map +1 -0
- package/dist/scorers/exact-match-scorer.d.ts +10 -0
- package/dist/scorers/exact-match-scorer.js +84 -0
- package/dist/scorers/exact-match-scorer.js.map +1 -0
- package/package.json +88 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ExampleBuilder = exports.Example = void 0;
|
|
4
|
+
class Example {
|
|
5
|
+
constructor(options) {
|
|
6
|
+
this.input = options.input;
|
|
7
|
+
this.actualOutput = options.actualOutput;
|
|
8
|
+
this.expectedOutput = options.expectedOutput;
|
|
9
|
+
this.context = options.context;
|
|
10
|
+
this.retrievalContext = options.retrievalContext;
|
|
11
|
+
this.additionalMetadata = options.additionalMetadata;
|
|
12
|
+
this.toolsCalled = options.toolsCalled;
|
|
13
|
+
this.expectedTools = options.expectedTools;
|
|
14
|
+
this.exampleId = options.exampleId || this.generateUUID();
|
|
15
|
+
this.exampleIndex = options.exampleIndex;
|
|
16
|
+
this.timestamp = options.timestamp;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Generate a UUID for the example ID
|
|
20
|
+
*/
|
|
21
|
+
generateUUID() {
|
|
22
|
+
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function (c) {
|
|
23
|
+
const r = Math.random() * 16 | 0;
|
|
24
|
+
const v = c === 'x' ? r : (r & 0x3 | 0x8);
|
|
25
|
+
return v.toString(16);
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Builder pattern for creating an Example
|
|
30
|
+
*/
|
|
31
|
+
static builder() {
|
|
32
|
+
return new ExampleBuilder();
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Convert the example to a plain object
|
|
36
|
+
*/
|
|
37
|
+
toJSON() {
|
|
38
|
+
return {
|
|
39
|
+
input: this.input,
|
|
40
|
+
actual_output: this.actualOutput,
|
|
41
|
+
expected_output: this.expectedOutput,
|
|
42
|
+
context: this.context,
|
|
43
|
+
retrieval_context: this.retrievalContext,
|
|
44
|
+
additional_metadata: this.additionalMetadata,
|
|
45
|
+
tools_called: this.toolsCalled,
|
|
46
|
+
expected_tools: this.expectedTools,
|
|
47
|
+
example_id: this.exampleId,
|
|
48
|
+
example_index: this.exampleIndex,
|
|
49
|
+
timestamp: this.timestamp,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
exports.Example = Example;
|
|
54
|
+
/**
|
|
55
|
+
* Builder for creating Example instances
|
|
56
|
+
*/
|
|
57
|
+
class ExampleBuilder {
|
|
58
|
+
constructor() {
|
|
59
|
+
this._input = '';
|
|
60
|
+
}
|
|
61
|
+
input(input) {
|
|
62
|
+
this._input = input;
|
|
63
|
+
return this;
|
|
64
|
+
}
|
|
65
|
+
actualOutput(actualOutput) {
|
|
66
|
+
this._actualOutput = actualOutput;
|
|
67
|
+
return this;
|
|
68
|
+
}
|
|
69
|
+
expectedOutput(expectedOutput) {
|
|
70
|
+
this._expectedOutput = expectedOutput;
|
|
71
|
+
return this;
|
|
72
|
+
}
|
|
73
|
+
context(context) {
|
|
74
|
+
this._context = context;
|
|
75
|
+
return this;
|
|
76
|
+
}
|
|
77
|
+
retrievalContext(retrievalContext) {
|
|
78
|
+
this._retrievalContext = retrievalContext;
|
|
79
|
+
return this;
|
|
80
|
+
}
|
|
81
|
+
additionalMetadata(additionalMetadata) {
|
|
82
|
+
this._additionalMetadata = additionalMetadata;
|
|
83
|
+
return this;
|
|
84
|
+
}
|
|
85
|
+
toolsCalled(toolsCalled) {
|
|
86
|
+
this._toolsCalled = toolsCalled;
|
|
87
|
+
return this;
|
|
88
|
+
}
|
|
89
|
+
expectedTools(expectedTools) {
|
|
90
|
+
this._expectedTools = expectedTools;
|
|
91
|
+
return this;
|
|
92
|
+
}
|
|
93
|
+
exampleId(exampleId) {
|
|
94
|
+
this._exampleId = exampleId;
|
|
95
|
+
return this;
|
|
96
|
+
}
|
|
97
|
+
exampleIndex(exampleIndex) {
|
|
98
|
+
this._exampleIndex = exampleIndex;
|
|
99
|
+
return this;
|
|
100
|
+
}
|
|
101
|
+
timestamp(timestamp) {
|
|
102
|
+
this._timestamp = timestamp;
|
|
103
|
+
return this;
|
|
104
|
+
}
|
|
105
|
+
build() {
|
|
106
|
+
if (!this._input) {
|
|
107
|
+
throw new Error('Input is required for an Example');
|
|
108
|
+
}
|
|
109
|
+
return new Example({
|
|
110
|
+
input: this._input,
|
|
111
|
+
actualOutput: this._actualOutput,
|
|
112
|
+
expectedOutput: this._expectedOutput,
|
|
113
|
+
context: this._context,
|
|
114
|
+
retrievalContext: this._retrievalContext,
|
|
115
|
+
additionalMetadata: this._additionalMetadata,
|
|
116
|
+
toolsCalled: this._toolsCalled,
|
|
117
|
+
expectedTools: this._expectedTools,
|
|
118
|
+
exampleId: this._exampleId,
|
|
119
|
+
exampleIndex: this._exampleIndex,
|
|
120
|
+
timestamp: this._timestamp,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
exports.ExampleBuilder = ExampleBuilder;
|
|
125
|
+
//# sourceMappingURL=example.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"example.js","sourceRoot":"","sources":["../../src/data/example.ts"],"names":[],"mappings":";;;AAiBA,MAAa,OAAO;IAalB,YAAY,OAAuB;QACjC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;QACzC,IAAI,CAAC,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC;QAC7C,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;QAC/B,IAAI,CAAC,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,CAAC;QACjD,IAAI,CAAC,kBAAkB,GAAG,OAAO,CAAC,kBAAkB,CAAC;QACrD,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QACvC,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,aAAa,CAAC;QAC3C,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;QAC1D,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;QACzC,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;IACrC,CAAC;IAED;;OAEG;IACK,YAAY;QAClB,OAAO,sCAAsC,CAAC,OAAO,CAAC,OAAO,EAAE,UAAS,CAAC;YACvE,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;YACjC,MAAM,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC,CAAC;YAC1C,OAAO,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACxB,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,OAAO;QACZ,OAAO,IAAI,cAAc,EAAE,CAAC;IAC9B,CAAC;IAED;;OAEG;IACH,MAAM;QACJ,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,aAAa,EAAE,IAAI,CAAC,YAAY;YAChC,eAAe,EAAE,IAAI,CAAC,cAAc;YACpC,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,iBAAiB,EAAE,IAAI,CAAC,gBAAgB;YACxC,mBAAmB,EAAE,IAAI,CAAC,kBAAkB;YAC5C,YAAY,EAAE,IAAI,CAAC,WAAW;YAC9B,cAAc,EAAE,IAAI,CAAC,aAAa;YAClC,UAAU,EAAE,IAAI,CAAC,SAAS;YAC1B,aAAa,EAAE,IAAI,CAAC,YAAY;YAChC,SAAS,EAAE,IAAI,CAAC,SAAS;SAC1B,CAAC;IACJ,CAAC;CACF;AA/DD,0BA+DC;AAED;;GAEG;AACH,MAAa,cAAc;IAA3B;QACU,WAAM,GAAW,EAAE,CAAC;IAsF9B,CAAC;IA1EC,KAAK,CAAC,KAAa;QACjB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,YAAY,CAAC,YAAoB;QAC/B,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAClC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,cAAc,CAAC,cAAsB;QACnC,IAAI,CAAC,eAAe,GAAG,cAAc,CAAC;QACtC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,CAAC,OAAiB;QACvB,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;QACxB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,gBAAgB,CAAC,gBAA0B;QACzC,IAAI,CAAC,iBAAiB,GAAG,gBAAgB,CAAC;QAC1C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,kBAAkB,CAAC,kBAAuC;QACxD,IAAI,CAAC,mBAAmB,GAAG,kBAAkB,CAAC;QAC9C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,WAAW,CAAC,WAAkB;QAC5B,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;QAChC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,aAAa,CAAC,aAAoB;QAChC,IAAI,CAAC,cAAc,GAAG,aAAa,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,SAAS,CAAC,SAAiB;QACzB,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,YAAY,CAAC,YAAoB;QAC/B,IAAI,CAAC,aAAa,GAAG,YAAY,CAAC;QAClC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,SAAS,CAAC,SAAiB;QACzB,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC5B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK;QACH,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACtD,CAAC;QAED,OAAO,IAAI,OAAO,CAAC;YACjB,KAAK,EAAE,IAAI,CAAC,MAAM;YAClB,YAAY,EAAE,IAAI,CAAC,aAAa;YAChC,cAAc,EAAE,IAAI,CAAC,eAAe;YACpC,OAAO,EAAE,IAAI,CAAC,QAAQ;YACtB,gBAAgB,EAAE,IAAI,CAAC,iBAAiB;YACxC,kBAAkB,EAAE,IAAI,CAAC,mBAAmB;YAC5C,WAAW,EAAE,IAAI,CAAC,YAAY;YAC9B,aAAa,EAAE,IAAI,CAAC,cAAc;YAClC,SAAS,EAAE,IAAI,CAAC,UAAU;YAC1B,YAAY,EAAE,IAAI,CAAC,aAAa;YAChC,SAAS,EAAE,IAAI,CAAC,UAAU;SAC3B,CAAC,CAAC;IACL,CAAC;CACF;AAvFD,wCAuFC"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { Example } from './example';
|
|
2
|
+
/**
|
|
3
|
+
* Represents the data for a single scorer
|
|
4
|
+
*/
|
|
5
|
+
export interface ScorerData {
|
|
6
|
+
name: string;
|
|
7
|
+
score: number;
|
|
8
|
+
threshold: number;
|
|
9
|
+
success: boolean;
|
|
10
|
+
reason: string | null;
|
|
11
|
+
strict_mode: boolean | null;
|
|
12
|
+
evaluation_model: string | null;
|
|
13
|
+
error: string | null;
|
|
14
|
+
evaluation_cost: number | null;
|
|
15
|
+
verbose_logs: any | null;
|
|
16
|
+
additional_metadata: Record<string, any>;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Represents the result of scoring an example
|
|
20
|
+
*/
|
|
21
|
+
export interface ScoringResultOptions {
|
|
22
|
+
dataObject: Example;
|
|
23
|
+
scorersData?: ScorerData[];
|
|
24
|
+
error?: string;
|
|
25
|
+
}
|
|
26
|
+
export declare class ScoringResult {
|
|
27
|
+
dataObject: Example;
|
|
28
|
+
scorersData?: ScorerData[];
|
|
29
|
+
error?: string;
|
|
30
|
+
constructor(options: ScoringResultOptions);
|
|
31
|
+
/**
|
|
32
|
+
* Builder pattern for creating a ScoringResult
|
|
33
|
+
*/
|
|
34
|
+
static builder(): ScoringResultBuilder;
|
|
35
|
+
/**
|
|
36
|
+
* Convert the scoring result to a plain object
|
|
37
|
+
*/
|
|
38
|
+
toJSON(): Record<string, any>;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Builder for creating ScoringResult instances
|
|
42
|
+
*/
|
|
43
|
+
export declare class ScoringResultBuilder {
|
|
44
|
+
private _dataObject;
|
|
45
|
+
private _scorersData?;
|
|
46
|
+
private _error?;
|
|
47
|
+
dataObject(dataObject: Example): ScoringResultBuilder;
|
|
48
|
+
scorersData(scorersData: ScorerData[]): ScoringResultBuilder;
|
|
49
|
+
error(error: string): ScoringResultBuilder;
|
|
50
|
+
build(): ScoringResult;
|
|
51
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ScoringResultBuilder = exports.ScoringResult = void 0;
|
|
4
|
+
class ScoringResult {
|
|
5
|
+
constructor(options) {
|
|
6
|
+
this.dataObject = options.dataObject;
|
|
7
|
+
this.scorersData = options.scorersData;
|
|
8
|
+
this.error = options.error;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Builder pattern for creating a ScoringResult
|
|
12
|
+
*/
|
|
13
|
+
static builder() {
|
|
14
|
+
return new ScoringResultBuilder();
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Convert the scoring result to a plain object
|
|
18
|
+
*/
|
|
19
|
+
toJSON() {
|
|
20
|
+
return {
|
|
21
|
+
success: this.scorersData ? this.scorersData.every(scorer => scorer.success) : false,
|
|
22
|
+
scorers_data: this.scorersData ? this.scorersData.map(scorer => ({
|
|
23
|
+
name: scorer.name,
|
|
24
|
+
threshold: scorer.threshold,
|
|
25
|
+
success: scorer.success,
|
|
26
|
+
score: scorer.score,
|
|
27
|
+
reason: scorer.reason,
|
|
28
|
+
strict_mode: scorer.strict_mode,
|
|
29
|
+
evaluation_model: scorer.evaluation_model,
|
|
30
|
+
error: scorer.error,
|
|
31
|
+
evaluation_cost: scorer.evaluation_cost,
|
|
32
|
+
verbose_logs: scorer.verbose_logs,
|
|
33
|
+
additional_metadata: scorer.additional_metadata
|
|
34
|
+
})) : null,
|
|
35
|
+
data_object: this.dataObject ? {
|
|
36
|
+
input: this.dataObject.input,
|
|
37
|
+
actual_output: this.dataObject.actualOutput,
|
|
38
|
+
expected_output: this.dataObject.expectedOutput,
|
|
39
|
+
context: this.dataObject.context,
|
|
40
|
+
retrieval_context: this.dataObject.retrievalContext,
|
|
41
|
+
additional_metadata: this.dataObject.additionalMetadata,
|
|
42
|
+
tools_called: this.dataObject.toolsCalled,
|
|
43
|
+
expected_tools: this.dataObject.expectedTools,
|
|
44
|
+
name: "example",
|
|
45
|
+
example_id: `example-${Date.now()}-${Math.floor(Math.random() * 1000)}`,
|
|
46
|
+
example_index: 0,
|
|
47
|
+
timestamp: new Date().toISOString(),
|
|
48
|
+
trace_id: `trace-${Date.now()}-${Math.floor(Math.random() * 1000)}`
|
|
49
|
+
} : null,
|
|
50
|
+
error: this.error || null
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
exports.ScoringResult = ScoringResult;
|
|
55
|
+
/**
|
|
56
|
+
* Builder for creating ScoringResult instances
|
|
57
|
+
*/
|
|
58
|
+
class ScoringResultBuilder {
|
|
59
|
+
dataObject(dataObject) {
|
|
60
|
+
this._dataObject = dataObject;
|
|
61
|
+
return this;
|
|
62
|
+
}
|
|
63
|
+
scorersData(scorersData) {
|
|
64
|
+
this._scorersData = scorersData;
|
|
65
|
+
return this;
|
|
66
|
+
}
|
|
67
|
+
error(error) {
|
|
68
|
+
this._error = error;
|
|
69
|
+
return this;
|
|
70
|
+
}
|
|
71
|
+
build() {
|
|
72
|
+
if (!this._dataObject) {
|
|
73
|
+
throw new Error('Data object is required for a ScoringResult');
|
|
74
|
+
}
|
|
75
|
+
return new ScoringResult({
|
|
76
|
+
dataObject: this._dataObject,
|
|
77
|
+
scorersData: this._scorersData,
|
|
78
|
+
error: this._error,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
exports.ScoringResultBuilder = ScoringResultBuilder;
|
|
83
|
+
//# sourceMappingURL=result.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"result.js","sourceRoot":"","sources":["../../src/data/result.ts"],"names":[],"mappings":";;;AA4BA,MAAa,aAAa;IAKxB,YAAY,OAA6B;QACvC,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QACvC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IAC7B,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,OAAO;QACZ,OAAO,IAAI,oBAAoB,EAAE,CAAC;IACpC,CAAC;IAED;;OAEG;IACH,MAAM;QACJ,OAAO;YACL,OAAO,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK;YACpF,YAAY,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;gBAC/D,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;gBACvB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;gBACzC,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,eAAe,EAAE,MAAM,CAAC,eAAe;gBACvC,YAAY,EAAE,MAAM,CAAC,YAAY;gBACjC,mBAAmB,EAAE,MAAM,CAAC,mBAAmB;aAChD,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;YACV,WAAW,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;gBAC7B,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK;gBAC5B,aAAa,EAAE,IAAI,CAAC,UAAU,CAAC,YAAY;gBAC3C,eAAe,EAAE,IAAI,CAAC,UAAU,CAAC,cAAc;gBAC/C,OAAO,EAAE,IAAI,CAAC,UAAU,CAAC,OAAO;gBAChC,iBAAiB,EAAE,IAAI,CAAC,UAAU,CAAC,gBAAgB;gBACnD,mBAAmB,EAAE,IAAI,CAAC,UAAU,CAAC,kBAAkB;gBACvD,YAAY,EAAE,IAAI,CAAC,UAAU,CAAC,WAAW;gBACzC,cAAc,EAAE,IAAI,CAAC,UAAU,CAAC,aAAa;gBAC7C,IAAI,EAAE,SAAS;gBACf,UAAU,EAAE,WAAW,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,EAAE;gBACvE,aAAa,EAAE,CAAC;gBAChB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,QAAQ,EAAE,SAAS,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,EAAE;aACpE,CAAC,CAAC,CAAC,IAAI;YACR,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,IAAI;SAC1B,CAAC;IACJ,CAAC;CACF;AAvDD,sCAuDC;AAED;;GAEG;AACH,MAAa,oBAAoB;IAK/B,UAAU,CAAC,UAAmB;QAC5B,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;QAC9B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,WAAW,CAAC,WAAyB;QACnC,IAAI,CAAC,YAAY,GAAG,WAAW,CAAC;QAChC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK,CAAC,KAAa;QACjB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK;QACH,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;QACjE,CAAC;QAED,OAAO,IAAI,aAAa,CAAC;YACvB,UAAU,EAAE,IAAI,CAAC,WAAW;YAC5B,WAAW,EAAE,IAAI,CAAC,YAAY;YAC9B,KAAK,EAAE,IAAI,CAAC,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;CACF;AA/BD,oDA+BC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { Example } from './data/example';
|
|
2
|
+
import { APIJudgmentScorer, JudgevalScorer } from './scorers/base-scorer';
|
|
3
|
+
import { Rule } from './rules';
|
|
4
|
+
/**
|
|
5
|
+
* Stores example and evaluation scorers together for running an eval task
|
|
6
|
+
*/
|
|
7
|
+
export interface EvaluationRunOptions {
|
|
8
|
+
logResults?: boolean;
|
|
9
|
+
organizationId?: string;
|
|
10
|
+
projectName?: string;
|
|
11
|
+
evalName?: string;
|
|
12
|
+
examples: Example[];
|
|
13
|
+
scorers: Array<APIJudgmentScorer | JudgevalScorer>;
|
|
14
|
+
model: string | string[] | any;
|
|
15
|
+
aggregator?: string;
|
|
16
|
+
metadata?: Record<string, any>;
|
|
17
|
+
judgmentApiKey?: string;
|
|
18
|
+
override?: boolean;
|
|
19
|
+
rules?: Rule[];
|
|
20
|
+
}
|
|
21
|
+
export declare class EvaluationRun {
|
|
22
|
+
logResults: boolean;
|
|
23
|
+
organizationId?: string;
|
|
24
|
+
projectName?: string;
|
|
25
|
+
evalName?: string;
|
|
26
|
+
examples: Example[];
|
|
27
|
+
scorers: Array<APIJudgmentScorer | JudgevalScorer>;
|
|
28
|
+
model: string | string[] | any;
|
|
29
|
+
aggregator?: string;
|
|
30
|
+
metadata?: Record<string, any>;
|
|
31
|
+
judgmentApiKey?: string;
|
|
32
|
+
override?: boolean;
|
|
33
|
+
rules?: Rule[];
|
|
34
|
+
constructor(options: EvaluationRunOptions);
|
|
35
|
+
/**
|
|
36
|
+
* Validate the evaluation run configuration
|
|
37
|
+
*/
|
|
38
|
+
private validate;
|
|
39
|
+
/**
|
|
40
|
+
* Convert the evaluation run to a plain object
|
|
41
|
+
*
|
|
42
|
+
*/
|
|
43
|
+
toJSON(): Record<string, any>;
|
|
44
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.EvaluationRun = void 0;
|
|
4
|
+
const example_1 = require("./data/example");
|
|
5
|
+
const base_scorer_1 = require("./scorers/base-scorer");
|
|
6
|
+
const constants_1 = require("./constants");
|
|
7
|
+
class EvaluationRun {
|
|
8
|
+
constructor(options) {
|
|
9
|
+
this.logResults = options.logResults || false;
|
|
10
|
+
this.organizationId = options.organizationId;
|
|
11
|
+
this.projectName = options.projectName;
|
|
12
|
+
this.evalName = options.evalName;
|
|
13
|
+
this.examples = options.examples;
|
|
14
|
+
this.scorers = options.scorers;
|
|
15
|
+
this.model = options.model;
|
|
16
|
+
this.aggregator = options.aggregator;
|
|
17
|
+
this.metadata = options.metadata;
|
|
18
|
+
this.judgmentApiKey = options.judgmentApiKey || '';
|
|
19
|
+
this.override = options.override || false;
|
|
20
|
+
this.rules = options.rules;
|
|
21
|
+
// Validate
|
|
22
|
+
this.validate();
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Validate the evaluation run configuration
|
|
26
|
+
*/
|
|
27
|
+
validate() {
|
|
28
|
+
// Validate log_results
|
|
29
|
+
if (typeof this.logResults !== 'boolean') {
|
|
30
|
+
throw new Error(`logResults must be a boolean. Received ${this.logResults} of type ${typeof this.logResults}`);
|
|
31
|
+
}
|
|
32
|
+
// Validate project_name
|
|
33
|
+
if (this.logResults && !this.projectName) {
|
|
34
|
+
throw new Error('Project name is required when logResults is true. Please include the projectName argument.');
|
|
35
|
+
}
|
|
36
|
+
// Validate eval_name
|
|
37
|
+
if (this.logResults && !this.evalName) {
|
|
38
|
+
throw new Error('Eval name is required when logResults is true. Please include the evalName argument.');
|
|
39
|
+
}
|
|
40
|
+
// Validate examples
|
|
41
|
+
if (!this.examples || this.examples.length === 0) {
|
|
42
|
+
throw new Error('Examples cannot be empty.');
|
|
43
|
+
}
|
|
44
|
+
for (const example of this.examples) {
|
|
45
|
+
if (!(example instanceof example_1.Example)) {
|
|
46
|
+
throw new Error(`Invalid type for Example: ${typeof example}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
// Validate scorers
|
|
50
|
+
if (!this.scorers || this.scorers.length === 0) {
|
|
51
|
+
throw new Error('Scorers cannot be empty.');
|
|
52
|
+
}
|
|
53
|
+
for (const scorer of this.scorers) {
|
|
54
|
+
if (!(scorer instanceof base_scorer_1.APIJudgmentScorer) && !(scorer instanceof base_scorer_1.JudgevalScorer)) {
|
|
55
|
+
throw new Error(`Invalid type for Scorer: ${typeof scorer}`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Validate model
|
|
59
|
+
if (!this.model) {
|
|
60
|
+
throw new Error('Model cannot be empty.');
|
|
61
|
+
}
|
|
62
|
+
// Check if model is a JudgevalJudge (would be implemented separately)
|
|
63
|
+
if (typeof this.model === 'object' && this.model !== null && !Array.isArray(this.model)) {
|
|
64
|
+
// Verify all scorers are JudgevalScorer when using JudgevalJudge
|
|
65
|
+
if (!this.scorers.every(s => s instanceof base_scorer_1.JudgevalScorer)) {
|
|
66
|
+
throw new Error('When using a JudgevalJudge model, all scorers must be JudgevalScorer type');
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
else if (typeof this.model === 'string') {
|
|
70
|
+
// Check if model is a string
|
|
71
|
+
if (!constants_1.ACCEPTABLE_MODELS.has(this.model)) {
|
|
72
|
+
throw new Error(`Model name ${this.model} not recognized. Please select a valid model name.`);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
else if (Array.isArray(this.model)) {
|
|
76
|
+
// Check if model is an array of strings
|
|
77
|
+
if (!this.model.every(m => typeof m === 'string')) {
|
|
78
|
+
throw new Error('When providing a list of models, all elements must be strings');
|
|
79
|
+
}
|
|
80
|
+
for (const m of this.model) {
|
|
81
|
+
if (!constants_1.ACCEPTABLE_MODELS.has(m)) {
|
|
82
|
+
throw new Error(`Model name ${m} not recognized. Please select a valid model name.`);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
throw new Error(`Model must be one of: string, list of strings, or JudgevalJudge instance. Received type ${typeof this.model}.`);
|
|
88
|
+
}
|
|
89
|
+
// Validate aggregator
|
|
90
|
+
if (Array.isArray(this.model) && !this.aggregator) {
|
|
91
|
+
throw new Error('Aggregator cannot be empty when using multiple models.');
|
|
92
|
+
}
|
|
93
|
+
if (this.aggregator && !constants_1.ACCEPTABLE_MODELS.has(this.aggregator)) {
|
|
94
|
+
throw new Error(`Model name ${this.aggregator} not recognized.`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Convert the evaluation run to a plain object
|
|
99
|
+
*
|
|
100
|
+
*/
|
|
101
|
+
toJSON() {
|
|
102
|
+
const data = {
|
|
103
|
+
log_results: this.logResults,
|
|
104
|
+
organization_id: this.organizationId,
|
|
105
|
+
project_name: this.projectName,
|
|
106
|
+
eval_name: this.evalName,
|
|
107
|
+
examples: this.examples.map(example => example.toJSON ? example.toJSON() : example),
|
|
108
|
+
scorers: this.scorers.map(scorer => {
|
|
109
|
+
if ('toJSON' in scorer) {
|
|
110
|
+
return scorer.toJSON();
|
|
111
|
+
}
|
|
112
|
+
else if ('toDict' in scorer) {
|
|
113
|
+
return scorer.toDict();
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
return {
|
|
117
|
+
score_type: scorer.scoreType,
|
|
118
|
+
threshold: scorer.threshold
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
}),
|
|
122
|
+
model: this.model,
|
|
123
|
+
aggregator: this.aggregator,
|
|
124
|
+
metadata: this.metadata,
|
|
125
|
+
judgment_api_key: this.judgmentApiKey,
|
|
126
|
+
override: this.override
|
|
127
|
+
};
|
|
128
|
+
if (this.rules) {
|
|
129
|
+
// Process rules to ensure proper serialization
|
|
130
|
+
data.rules = this.rules.map(rule => rule.toJSON ? rule.toJSON() : rule);
|
|
131
|
+
}
|
|
132
|
+
return data;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
exports.EvaluationRun = EvaluationRun;
|
|
136
|
+
//# sourceMappingURL=evaluation-run.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluation-run.js","sourceRoot":"","sources":["../src/evaluation-run.ts"],"names":[],"mappings":";;;AAAA,4CAAyC;AACzC,uDAAkF;AAElF,2CAAgD;AAoBhD,MAAa,aAAa;IAcxB,YAAY,OAA6B;QACvC,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,KAAK,CAAC;QAC9C,IAAI,CAAC,cAAc,GAAG,OAAO,CAAC,cAAc,CAAC;QAC7C,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QACvC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QACjC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QACjC,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC;QAC/B,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QACjC,IAAI,CAAC,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,EAAE,CAAC;QACnD,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,KAAK,CAAC;QAC1C,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAE3B,WAAW;QACX,IAAI,CAAC,QAAQ,EAAE,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,QAAQ;QACd,uBAAuB;QACvB,IAAI,OAAO,IAAI,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,0CAA0C,IAAI,CAAC,UAAU,YAAY,OAAO,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QACjH,CAAC;QAED,wBAAwB;QACxB,IAAI,IAAI,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,4FAA4F,CAAC,CAAC;QAChH,CAAC;QAED,qBAAqB;QACrB,IAAI,IAAI,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,sFAAsF,CAAC,CAAC;QAC1G,CAAC;QAED,oBAAoB;QACpB,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjD,MAAM,IAAI,KAAK,CAAC,2BAA2B,CAAC,CAAC;QAC/C,CAAC;QACD,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACpC,IAAI,CAAC,CAAC,OAAO,YAAY,iBAAO,CAAC,EAAE,CAAC;gBAClC,MAAM,IAAI,KAAK,CAAC,6BAA6B,OAAO,OAAO,EAAE,CAAC,CAAC;YACjE,CAAC;QACH,CAAC;QAED,mBAAmB;QACnB,IAAI,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/C,MAAM,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC9C,CAAC;QACD,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAClC,IAAI,CAAC,CAAC,MAAM,YAAY,+BAAiB,CAAC,IAAI,CAAC,CAAC,MAAM,YAAY,4BAAc,CAAC,EAAE,CAAC;gBAClF,MAAM,IAAI,KAAK,CAAC,4BAA4B,OAAO,MAAM,EAAE,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;QAED,iBAAiB;QACjB,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAC5C,CAAC;QAED,sEAAsE;QACtE,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACxF,iEAAiE;YACjE,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,YAAY,4BAAc,CAAC,EAAE,CAAC;gBAC1D,MAAM,IAAI,KAAK,CAAC,2EAA2E,CAAC,CAAC;YAC/F,CAAC;QACH,CAAC;aAAM,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC1C,6BAA6B;YAC7B,IAAI,CAAC,6BAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACvC,MAAM,IAAI,KAAK,CAAC,cAAc,IAAI,CAAC,KAAK,oDAAoD,CAAC,CAAC;YAChG,CAAC;QACH,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACrC,wCAAwC;YACxC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,EAAE,CAAC;gBAClD,MAAM,IAAI,KAAK,CAAC,+DAA+D,CAAC,CAAC;YACnF,CAAC;YACD,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC3B,IAAI,CAAC,6BAAiB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC9B,MAAM,IAAI,KAAK,CAAC,cAAc,CAAC,oDAAoD,CAAC,CAAC;gBACvF,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,IAAI,KAAK,CAAC,2FAA2F,OAAO,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC;QACnI,CAAC;QAED,sBAAsB;QACtB,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;YAClD,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;QAC5E,CAAC;QACD,IAAI,IAAI,CAAC,UAAU,IAAI,CAAC,6BAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/D,MAAM,IAAI,KAAK,CAAC,cAAc,IAAI,CAAC,UAAU,kBAAkB,CAAC,CAAC;QACnE,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,MAAM;QACJ,MAAM,IAAI,GAAwB;YAChC,WAAW,EAAE,IAAI,CAAC,UAAU;YAC5B,eAAe,EAAE,IAAI,CAAC,cAAc;YACpC,YAAY,EAAE,IAAI,CAAC,WAAW;YAC9B,SAAS,EAAE,IAAI,CAAC,QAAQ;YACxB,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;YACnF,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE;gBACjC,IAAI,QAAQ,IAAI,MAAM,EAAE,CAAC;oBACvB,OAAO,MAAM,CAAC,MAAM,EAAE,CAAC;gBACzB,CAAC;qBAAM,IAAI,QAAQ,IAAI,MAAM,EAAE,CAAC;oBAC9B,OAAQ,MAAc,CAAC,MAAM,EAAE,CAAC;gBAClC,CAAC;qBAAM,CAAC;oBACN,OAAO;wBACL,UAAU,EAAG,MAAiB,CAAC,SAAS;wBACxC,SAAS,EAAG,MAAiB,CAAC,SAAS;qBACxC,CAAC;gBACJ,CAAC;YACH,CAAC,CAAC;YACF,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,gBAAgB,EAAE,IAAI,CAAC,cAAc;YACrC,QAAQ,EAAE,IAAI,CAAC,QAAQ;SACxB,CAAC;QAEF,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,+CAA+C;YAC/C,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC1E,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAnJD,sCAmJC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export { Example, ExampleBuilder, ExampleOptions } from './data/example';
|
|
2
|
+
export { ScoringResult, ScoringResultBuilder, ScorerData, ScoringResultOptions } from './data/result';
|
|
3
|
+
export { Scorer, APIJudgmentScorer, JudgevalScorer, ScorerWrapper } from './scorers/base-scorer';
|
|
4
|
+
export { AnswerCorrectnessScorer, AnswerRelevancyScorer, ComparisonScorer, ContextualPrecisionScorer, ContextualRecallScorer, ContextualRelevancyScorer, ExecutionOrderScorer, FaithfulnessScorer, GroundednessScorer, HallucinationScorer, InstructionAdherenceScorer, JsonCorrectnessScorer, SummarizationScorer } from './scorers/api-scorer';
|
|
5
|
+
export { AlertStatus, Condition, NotificationConfig, Rule, AlertResult, RulesEngine } from './rules';
|
|
6
|
+
export { EvaluationRun, EvaluationRunOptions } from './evaluation-run';
|
|
7
|
+
export { runEval, assertTest, JudgmentAPIError, sendToRabbitMQ, executeApiEval, mergeResults, checkMissingScorerData, checkEvalRunNameExists, logEvaluationResults, checkExamples } from './run-evaluation';
|
|
8
|
+
export { JudgmentClient } from './judgment-client';
|
|
9
|
+
export * from './constants';
|
|
10
|
+
export * from './clients';
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
exports.JudgmentClient = exports.checkExamples = exports.logEvaluationResults = exports.checkEvalRunNameExists = exports.checkMissingScorerData = exports.mergeResults = exports.executeApiEval = exports.sendToRabbitMQ = exports.JudgmentAPIError = exports.assertTest = exports.runEval = exports.EvaluationRun = exports.RulesEngine = exports.AlertResult = exports.Rule = exports.NotificationConfig = exports.Condition = exports.AlertStatus = exports.SummarizationScorer = exports.JsonCorrectnessScorer = exports.InstructionAdherenceScorer = exports.HallucinationScorer = exports.GroundednessScorer = exports.FaithfulnessScorer = exports.ExecutionOrderScorer = exports.ContextualRelevancyScorer = exports.ContextualRecallScorer = exports.ContextualPrecisionScorer = exports.ComparisonScorer = exports.AnswerRelevancyScorer = exports.AnswerCorrectnessScorer = exports.ScorerWrapper = exports.JudgevalScorer = exports.APIJudgmentScorer = exports.ScoringResultBuilder = exports.ScoringResult = exports.ExampleBuilder = exports.Example = void 0;
|
|
18
|
+
// Export data models
|
|
19
|
+
var example_1 = require("./data/example");
|
|
20
|
+
Object.defineProperty(exports, "Example", { enumerable: true, get: function () { return example_1.Example; } });
|
|
21
|
+
Object.defineProperty(exports, "ExampleBuilder", { enumerable: true, get: function () { return example_1.ExampleBuilder; } });
|
|
22
|
+
var result_1 = require("./data/result");
|
|
23
|
+
Object.defineProperty(exports, "ScoringResult", { enumerable: true, get: function () { return result_1.ScoringResult; } });
|
|
24
|
+
Object.defineProperty(exports, "ScoringResultBuilder", { enumerable: true, get: function () { return result_1.ScoringResultBuilder; } });
|
|
25
|
+
// Export scorers
|
|
26
|
+
var base_scorer_1 = require("./scorers/base-scorer");
|
|
27
|
+
Object.defineProperty(exports, "APIJudgmentScorer", { enumerable: true, get: function () { return base_scorer_1.APIJudgmentScorer; } });
|
|
28
|
+
Object.defineProperty(exports, "JudgevalScorer", { enumerable: true, get: function () { return base_scorer_1.JudgevalScorer; } });
|
|
29
|
+
Object.defineProperty(exports, "ScorerWrapper", { enumerable: true, get: function () { return base_scorer_1.ScorerWrapper; } });
|
|
30
|
+
var api_scorer_1 = require("./scorers/api-scorer");
|
|
31
|
+
Object.defineProperty(exports, "AnswerCorrectnessScorer", { enumerable: true, get: function () { return api_scorer_1.AnswerCorrectnessScorer; } });
|
|
32
|
+
Object.defineProperty(exports, "AnswerRelevancyScorer", { enumerable: true, get: function () { return api_scorer_1.AnswerRelevancyScorer; } });
|
|
33
|
+
Object.defineProperty(exports, "ComparisonScorer", { enumerable: true, get: function () { return api_scorer_1.ComparisonScorer; } });
|
|
34
|
+
Object.defineProperty(exports, "ContextualPrecisionScorer", { enumerable: true, get: function () { return api_scorer_1.ContextualPrecisionScorer; } });
|
|
35
|
+
Object.defineProperty(exports, "ContextualRecallScorer", { enumerable: true, get: function () { return api_scorer_1.ContextualRecallScorer; } });
|
|
36
|
+
Object.defineProperty(exports, "ContextualRelevancyScorer", { enumerable: true, get: function () { return api_scorer_1.ContextualRelevancyScorer; } });
|
|
37
|
+
Object.defineProperty(exports, "ExecutionOrderScorer", { enumerable: true, get: function () { return api_scorer_1.ExecutionOrderScorer; } });
|
|
38
|
+
Object.defineProperty(exports, "FaithfulnessScorer", { enumerable: true, get: function () { return api_scorer_1.FaithfulnessScorer; } });
|
|
39
|
+
Object.defineProperty(exports, "GroundednessScorer", { enumerable: true, get: function () { return api_scorer_1.GroundednessScorer; } });
|
|
40
|
+
Object.defineProperty(exports, "HallucinationScorer", { enumerable: true, get: function () { return api_scorer_1.HallucinationScorer; } });
|
|
41
|
+
Object.defineProperty(exports, "InstructionAdherenceScorer", { enumerable: true, get: function () { return api_scorer_1.InstructionAdherenceScorer; } });
|
|
42
|
+
Object.defineProperty(exports, "JsonCorrectnessScorer", { enumerable: true, get: function () { return api_scorer_1.JsonCorrectnessScorer; } });
|
|
43
|
+
Object.defineProperty(exports, "SummarizationScorer", { enumerable: true, get: function () { return api_scorer_1.SummarizationScorer; } });
|
|
44
|
+
// Export rules system
|
|
45
|
+
var rules_1 = require("./rules");
|
|
46
|
+
Object.defineProperty(exports, "AlertStatus", { enumerable: true, get: function () { return rules_1.AlertStatus; } });
|
|
47
|
+
Object.defineProperty(exports, "Condition", { enumerable: true, get: function () { return rules_1.Condition; } });
|
|
48
|
+
Object.defineProperty(exports, "NotificationConfig", { enumerable: true, get: function () { return rules_1.NotificationConfig; } });
|
|
49
|
+
Object.defineProperty(exports, "Rule", { enumerable: true, get: function () { return rules_1.Rule; } });
|
|
50
|
+
Object.defineProperty(exports, "AlertResult", { enumerable: true, get: function () { return rules_1.AlertResult; } });
|
|
51
|
+
Object.defineProperty(exports, "RulesEngine", { enumerable: true, get: function () { return rules_1.RulesEngine; } });
|
|
52
|
+
// Export evaluation components
|
|
53
|
+
var evaluation_run_1 = require("./evaluation-run");
|
|
54
|
+
Object.defineProperty(exports, "EvaluationRun", { enumerable: true, get: function () { return evaluation_run_1.EvaluationRun; } });
|
|
55
|
+
var run_evaluation_1 = require("./run-evaluation");
|
|
56
|
+
Object.defineProperty(exports, "runEval", { enumerable: true, get: function () { return run_evaluation_1.runEval; } });
|
|
57
|
+
Object.defineProperty(exports, "assertTest", { enumerable: true, get: function () { return run_evaluation_1.assertTest; } });
|
|
58
|
+
Object.defineProperty(exports, "JudgmentAPIError", { enumerable: true, get: function () { return run_evaluation_1.JudgmentAPIError; } });
|
|
59
|
+
Object.defineProperty(exports, "sendToRabbitMQ", { enumerable: true, get: function () { return run_evaluation_1.sendToRabbitMQ; } });
|
|
60
|
+
Object.defineProperty(exports, "executeApiEval", { enumerable: true, get: function () { return run_evaluation_1.executeApiEval; } });
|
|
61
|
+
Object.defineProperty(exports, "mergeResults", { enumerable: true, get: function () { return run_evaluation_1.mergeResults; } });
|
|
62
|
+
Object.defineProperty(exports, "checkMissingScorerData", { enumerable: true, get: function () { return run_evaluation_1.checkMissingScorerData; } });
|
|
63
|
+
Object.defineProperty(exports, "checkEvalRunNameExists", { enumerable: true, get: function () { return run_evaluation_1.checkEvalRunNameExists; } });
|
|
64
|
+
Object.defineProperty(exports, "logEvaluationResults", { enumerable: true, get: function () { return run_evaluation_1.logEvaluationResults; } });
|
|
65
|
+
Object.defineProperty(exports, "checkExamples", { enumerable: true, get: function () { return run_evaluation_1.checkExamples; } });
|
|
66
|
+
// Export client
|
|
67
|
+
var judgment_client_1 = require("./judgment-client");
|
|
68
|
+
Object.defineProperty(exports, "JudgmentClient", { enumerable: true, get: function () { return judgment_client_1.JudgmentClient; } });
|
|
69
|
+
// Export constants
|
|
70
|
+
__exportStar(require("./constants"), exports);
|
|
71
|
+
// Export clients
|
|
72
|
+
__exportStar(require("./clients"), exports);
|
|
73
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;AAAA,qBAAqB;AACrB,0CAAyE;AAAhE,kGAAA,OAAO,OAAA;AAAE,yGAAA,cAAc,OAAA;AAChC,wCAAsG;AAA7F,uGAAA,aAAa,OAAA;AAAE,8GAAA,oBAAoB,OAAA;AAE5C,iBAAiB;AACjB,qDAK+B;AAH7B,gHAAA,iBAAiB,OAAA;AACjB,6GAAA,cAAc,OAAA;AACd,4GAAA,aAAa,OAAA;AAGf,mDAc8B;AAb5B,qHAAA,uBAAuB,OAAA;AACvB,mHAAA,qBAAqB,OAAA;AACrB,8GAAA,gBAAgB,OAAA;AAChB,uHAAA,yBAAyB,OAAA;AACzB,oHAAA,sBAAsB,OAAA;AACtB,uHAAA,yBAAyB,OAAA;AACzB,kHAAA,oBAAoB,OAAA;AACpB,gHAAA,kBAAkB,OAAA;AAClB,gHAAA,kBAAkB,OAAA;AAClB,iHAAA,mBAAmB,OAAA;AACnB,wHAAA,0BAA0B,OAAA;AAC1B,mHAAA,qBAAqB,OAAA;AACrB,iHAAA,mBAAmB,OAAA;AAGrB,sBAAsB;AACtB,iCAOiB;AANf,oGAAA,WAAW,OAAA;AACX,kGAAA,SAAS,OAAA;AACT,2GAAA,kBAAkB,OAAA;AAClB,6FAAA,IAAI,OAAA;AACJ,oGAAA,WAAW,OAAA;AACX,oGAAA,WAAW,OAAA;AAGb,+BAA+B;AAC/B,mDAAuE;AAA9D,+GAAA,aAAa,OAAA;AACtB,mDAW0B;AAVxB,yGAAA,OAAO,OAAA;AACP,4GAAA,UAAU,OAAA;AACV,kHAAA,gBAAgB,OAAA;AAChB,gHAAA,cAAc,OAAA;AACd,gHAAA,cAAc,OAAA;AACd,8GAAA,YAAY,OAAA;AACZ,wHAAA,sBAAsB,OAAA;AACtB,wHAAA,sBAAsB,OAAA;AACtB,sHAAA,oBAAoB,OAAA;AACpB,+GAAA,aAAa,OAAA;AAGf,gBAAgB;AAChB,qDAAmD;AAA1C,iHAAA,cAAc,OAAA;AAEvB,mBAAmB;AACnB,8CAA4B;AAE5B,iBAAiB;AACjB,4CAA0B"}
|