@orq-ai/evaluators 1.0.0-12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2 -0
- package/dist/lib/comparison-evaluators.d.ts +26 -0
- package/dist/lib/comparison-evaluators.d.ts.map +1 -0
- package/dist/lib/comparison-evaluators.js +148 -0
- package/dist/lib/cosine-similarity-evaluator.d.ts +62 -0
- package/dist/lib/cosine-similarity-evaluator.d.ts.map +1 -0
- package/dist/lib/cosine-similarity-evaluator.js +162 -0
- package/dist/lib/evaluators.d.ts +2 -0
- package/dist/lib/evaluators.d.ts.map +1 -0
- package/dist/lib/evaluators.js +3 -0
- package/dist/lib/json-evaluators.d.ts +25 -0
- package/dist/lib/json-evaluators.d.ts.map +1 -0
- package/dist/lib/json-evaluators.js +177 -0
- package/dist/lib/string-evaluators.d.ts +30 -0
- package/dist/lib/string-evaluators.d.ts.map +1 -0
- package/dist/lib/string-evaluators.js +168 -0
- package/dist/lib/vector-utils.d.ts +27 -0
- package/dist/lib/vector-utils.d.ts.map +1 -0
- package/dist/lib/vector-utils.js +54 -0
- package/dist/tsconfig.lib.tsbuildinfo +1 -0
- package/package.json +55 -0
package/README.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# @orq-ai/evaluators
|
|
2
|
+
|
|
3
|
+
Reusable evaluators for AI evaluation frameworks. This package provides a collection of pre-built evaluators that can be imported and used in your `.eval` files.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @orq-ai/evaluators
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
### Cosine Similarity Evaluator
|
|
14
|
+
|
|
15
|
+
Compare semantic similarity between output and expected text using OpenAI embeddings:
|
|
16
|
+
|
|
17
|
+
```typescript
|
|
18
|
+
import {
|
|
19
|
+
cosineSimilarityEvaluator,
|
|
20
|
+
cosineSimilarityThresholdEvaluator,
|
|
21
|
+
simpleCosineSimilarity
|
|
22
|
+
} from "@orq-ai/evaluators";
|
|
23
|
+
|
|
24
|
+
// Simple usage - returns similarity score (0-1)
|
|
25
|
+
const evaluator = simpleCosineSimilarity("The capital of France is Paris");
|
|
26
|
+
|
|
27
|
+
// With threshold - returns boolean based on threshold
|
|
28
|
+
const thresholdEvaluator = cosineSimilarityThresholdEvaluator({
|
|
29
|
+
expectedText: "The capital of France is Paris",
|
|
30
|
+
threshold: 0.8,
|
|
31
|
+
name: "semantic-match"
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
// Advanced configuration
|
|
35
|
+
const customEvaluator = cosineSimilarityEvaluator({
|
|
36
|
+
expectedText: "Expected output text",
|
|
37
|
+
model: "text-embedding-3-large", // optional: custom embedding model
|
|
38
|
+
name: "custom-similarity"
|
|
39
|
+
});
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
#### Environment Variables
|
|
43
|
+
|
|
44
|
+
The cosine similarity evaluator requires one of:
|
|
45
|
+
- `OPENAI_API_KEY` - For direct OpenAI API access
|
|
46
|
+
- `ORQ_API_KEY` - For Orq proxy access (automatically uses `https://api.orq.ai/v2/proxy`)
|
|
47
|
+
|
|
48
|
+
When using Orq proxy, models should be prefixed with `openai/` (e.g., `openai/text-embedding-3-small`).
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,sCAAsC,CAAC;AACrD,cAAc,uBAAuB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { Evaluator } from "@orq-ai/evaluatorq";
|
|
2
|
+
/**
|
|
3
|
+
* Validates exact equality between output and expected output
|
|
4
|
+
*/
|
|
5
|
+
export declare const exactMatch: Evaluator;
|
|
6
|
+
/**
|
|
7
|
+
* Validates fuzzy equality (case-insensitive, trimmed strings)
|
|
8
|
+
*/
|
|
9
|
+
export declare const fuzzyMatch: Evaluator;
|
|
10
|
+
/**
|
|
11
|
+
* Validates that a numeric output is within a tolerance of the expected value
|
|
12
|
+
*/
|
|
13
|
+
export declare function withinTolerance(tolerance: number): Evaluator;
|
|
14
|
+
/**
|
|
15
|
+
* Validates that a numeric output is greater than a threshold
|
|
16
|
+
*/
|
|
17
|
+
export declare function greaterThan(threshold: number): Evaluator;
|
|
18
|
+
/**
|
|
19
|
+
* Validates that a numeric output is less than a threshold
|
|
20
|
+
*/
|
|
21
|
+
export declare function lessThan(threshold: number): Evaluator;
|
|
22
|
+
/**
|
|
23
|
+
* Validates that a numeric output is within a range (inclusive)
|
|
24
|
+
*/
|
|
25
|
+
export declare function inRange(min: number, max: number): Evaluator;
|
|
26
|
+
//# sourceMappingURL=comparison-evaluators.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"comparison-evaluators.d.ts","sourceRoot":"","sources":["../../src/lib/comparison-evaluators.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAEpD;;GAEG;AACH,eAAO,MAAM,UAAU,EAAE,SAmBxB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,UAAU,EAAE,SAqBxB,CAAC;AAEF;;GAEG;AACH,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,SAAS,CAgC5D;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,SAAS,EAAE,MAAM,GAAG,SAAS,CAuBxD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,SAAS,CAuBrD;AAED;;GAEG;AACH,wBAAgB,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,SAAS,CAuB3D"}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validates exact equality between output and expected output
|
|
3
|
+
*/
|
|
4
|
+
export const exactMatch = {
|
|
5
|
+
name: "exact-match",
|
|
6
|
+
scorer: async ({ output, data }) => {
|
|
7
|
+
if (data.expectedOutput === undefined) {
|
|
8
|
+
return {
|
|
9
|
+
value: true,
|
|
10
|
+
explanation: "No expected output provided, skipping validation",
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
const matches = JSON.stringify(output) === JSON.stringify(data.expectedOutput);
|
|
14
|
+
return {
|
|
15
|
+
value: matches,
|
|
16
|
+
explanation: matches
|
|
17
|
+
? "Output exactly matches expected"
|
|
18
|
+
: "Output does not match expected",
|
|
19
|
+
};
|
|
20
|
+
},
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Validates fuzzy equality (case-insensitive, trimmed strings)
|
|
24
|
+
*/
|
|
25
|
+
export const fuzzyMatch = {
|
|
26
|
+
name: "fuzzy-match",
|
|
27
|
+
scorer: async ({ output, data }) => {
|
|
28
|
+
if (data.expectedOutput === undefined) {
|
|
29
|
+
return {
|
|
30
|
+
value: true,
|
|
31
|
+
explanation: "No expected output provided, skipping validation",
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
const outputStr = String(output).trim().toLowerCase();
|
|
35
|
+
const expectedStr = String(data.expectedOutput).trim().toLowerCase();
|
|
36
|
+
const matches = outputStr === expectedStr;
|
|
37
|
+
return {
|
|
38
|
+
value: matches,
|
|
39
|
+
explanation: matches
|
|
40
|
+
? "Output matches expected (case-insensitive)"
|
|
41
|
+
: "Output does not match expected",
|
|
42
|
+
};
|
|
43
|
+
},
|
|
44
|
+
};
|
|
45
|
+
/**
|
|
46
|
+
* Validates that a numeric output is within a tolerance of the expected value
|
|
47
|
+
*/
|
|
48
|
+
export function withinTolerance(tolerance) {
|
|
49
|
+
return {
|
|
50
|
+
name: `within-tolerance-${tolerance}`,
|
|
51
|
+
scorer: async ({ output, data }) => {
|
|
52
|
+
if (data.expectedOutput === undefined) {
|
|
53
|
+
return {
|
|
54
|
+
value: true,
|
|
55
|
+
explanation: "No expected output provided, skipping validation",
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
const outputNum = Number(output);
|
|
59
|
+
const expectedNum = Number(data.expectedOutput);
|
|
60
|
+
if (Number.isNaN(outputNum) || Number.isNaN(expectedNum)) {
|
|
61
|
+
return {
|
|
62
|
+
value: false,
|
|
63
|
+
explanation: "Output or expected value is not a valid number",
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
const difference = Math.abs(outputNum - expectedNum);
|
|
67
|
+
const isWithinTolerance = difference <= tolerance;
|
|
68
|
+
return {
|
|
69
|
+
value: isWithinTolerance,
|
|
70
|
+
explanation: isWithinTolerance
|
|
71
|
+
? `Value ${outputNum} is within ${tolerance} of expected ${expectedNum}`
|
|
72
|
+
: `Value ${outputNum} differs by ${difference} from expected ${expectedNum} (tolerance: ${tolerance})`,
|
|
73
|
+
};
|
|
74
|
+
},
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Validates that a numeric output is greater than a threshold
|
|
79
|
+
*/
|
|
80
|
+
export function greaterThan(threshold) {
|
|
81
|
+
return {
|
|
82
|
+
name: `greater-than-${threshold}`,
|
|
83
|
+
scorer: async ({ output }) => {
|
|
84
|
+
const value = Number(output);
|
|
85
|
+
if (Number.isNaN(value)) {
|
|
86
|
+
return {
|
|
87
|
+
value: false,
|
|
88
|
+
explanation: "Output is not a valid number",
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
const isGreater = value > threshold;
|
|
92
|
+
return {
|
|
93
|
+
value: isGreater,
|
|
94
|
+
explanation: isGreater
|
|
95
|
+
? `Value ${value} is greater than ${threshold}`
|
|
96
|
+
: `Value ${value} is not greater than ${threshold}`,
|
|
97
|
+
};
|
|
98
|
+
},
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Validates that a numeric output is less than a threshold
|
|
103
|
+
*/
|
|
104
|
+
export function lessThan(threshold) {
|
|
105
|
+
return {
|
|
106
|
+
name: `less-than-${threshold}`,
|
|
107
|
+
scorer: async ({ output }) => {
|
|
108
|
+
const value = Number(output);
|
|
109
|
+
if (Number.isNaN(value)) {
|
|
110
|
+
return {
|
|
111
|
+
value: false,
|
|
112
|
+
explanation: "Output is not a valid number",
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
const isLess = value < threshold;
|
|
116
|
+
return {
|
|
117
|
+
value: isLess,
|
|
118
|
+
explanation: isLess
|
|
119
|
+
? `Value ${value} is less than ${threshold}`
|
|
120
|
+
: `Value ${value} is not less than ${threshold}`,
|
|
121
|
+
};
|
|
122
|
+
},
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Validates that a numeric output is within a range (inclusive)
|
|
127
|
+
*/
|
|
128
|
+
export function inRange(min, max) {
|
|
129
|
+
return {
|
|
130
|
+
name: `in-range-${min}-${max}`,
|
|
131
|
+
scorer: async ({ output }) => {
|
|
132
|
+
const value = Number(output);
|
|
133
|
+
if (Number.isNaN(value)) {
|
|
134
|
+
return {
|
|
135
|
+
value: false,
|
|
136
|
+
explanation: "Output is not a valid number",
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
const isInRange = value >= min && value <= max;
|
|
140
|
+
return {
|
|
141
|
+
value: isInRange,
|
|
142
|
+
explanation: isInRange
|
|
143
|
+
? `Value ${value} is within range [${min}, ${max}]`
|
|
144
|
+
: `Value ${value} is outside range [${min}, ${max}]`,
|
|
145
|
+
};
|
|
146
|
+
},
|
|
147
|
+
};
|
|
148
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import type { Evaluator } from "@orq-ai/evaluatorq";
|
|
2
|
+
/**
|
|
3
|
+
* Configuration options for the cosine similarity evaluator
|
|
4
|
+
*/
|
|
5
|
+
export interface CosineSimilarityConfig {
|
|
6
|
+
/**
|
|
7
|
+
* The expected text to compare against the output
|
|
8
|
+
*/
|
|
9
|
+
expectedText: string;
|
|
10
|
+
/**
|
|
11
|
+
* The embedding model to use
|
|
12
|
+
* @default "text-embedding-3-small" for OpenAI, "openai/text-embedding-3-small" for Orq
|
|
13
|
+
*/
|
|
14
|
+
model?: string;
|
|
15
|
+
/**
|
|
16
|
+
* Optional name for the evaluator
|
|
17
|
+
* @default "cosine-similarity"
|
|
18
|
+
*/
|
|
19
|
+
name?: string;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Configuration options for the cosine similarity threshold evaluator
|
|
23
|
+
*/
|
|
24
|
+
export interface CosineSimilarityThresholdConfig extends CosineSimilarityConfig {
|
|
25
|
+
/**
|
|
26
|
+
* Threshold for similarity score (0-1)
|
|
27
|
+
* The evaluator will return true if similarity meets the threshold
|
|
28
|
+
*/
|
|
29
|
+
threshold: number;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Creates a cosine similarity evaluator that returns the raw similarity score
|
|
33
|
+
* between the output and expected text using OpenAI embeddings
|
|
34
|
+
*
|
|
35
|
+
* @example
|
|
36
|
+
* ```typescript
|
|
37
|
+
* const evaluator = cosineSimilarityEvaluator({
|
|
38
|
+
* expectedText: "The capital of France is Paris"
|
|
39
|
+
* });
|
|
40
|
+
* ```
|
|
41
|
+
*/
|
|
42
|
+
export declare function cosineSimilarityEvaluator(config: CosineSimilarityConfig): Evaluator;
|
|
43
|
+
/**
|
|
44
|
+
* Creates a cosine similarity evaluator that returns a boolean based on
|
|
45
|
+
* whether the similarity meets a threshold
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* ```typescript
|
|
49
|
+
* const evaluator = cosineSimilarityThresholdEvaluator({
|
|
50
|
+
* expectedText: "The capital of France is Paris",
|
|
51
|
+
* threshold: 0.8
|
|
52
|
+
* });
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
export declare function cosineSimilarityThresholdEvaluator(config: CosineSimilarityThresholdConfig): Evaluator;
|
|
56
|
+
/**
|
|
57
|
+
* Creates a simple cosine similarity evaluator with default settings
|
|
58
|
+
* @param expectedText The expected text to compare against
|
|
59
|
+
* @returns An evaluator that returns the cosine similarity score (0-1)
|
|
60
|
+
*/
|
|
61
|
+
export declare function simpleCosineSimilarity(expectedText: string): Evaluator;
|
|
62
|
+
//# sourceMappingURL=cosine-similarity-evaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cosine-similarity-evaluator.d.ts","sourceRoot":"","sources":["../../src/lib/cosine-similarity-evaluator.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAgCpD;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,+BACf,SAAQ,sBAAsB;IAC9B;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,sBAAsB,GAC7B,SAAS,CA2DX;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,+BAA+B,GACtC,SAAS,CAmEX;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,YAAY,EAAE,MAAM,GAAG,SAAS,CAEtE"}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
2
|
+
import { cosineSimilarity } from "./vector-utils.js";
|
|
3
|
+
/**
|
|
4
|
+
* Creates an OpenAI client configured for either direct OpenAI API access or Orq proxy
|
|
5
|
+
* @throws {Error} If neither OPENAI_API_KEY nor ORQ_API_KEY is defined
|
|
6
|
+
*/
|
|
7
|
+
function createOpenAIClient() {
|
|
8
|
+
const orqApiKey = process.env.ORQ_API_KEY;
|
|
9
|
+
const openaiApiKey = process.env.OPENAI_API_KEY;
|
|
10
|
+
if (orqApiKey) {
|
|
11
|
+
// Use Orq proxy when ORQ_API_KEY is available
|
|
12
|
+
return new OpenAI({
|
|
13
|
+
baseURL: "https://api.orq.ai/v2/proxy",
|
|
14
|
+
apiKey: orqApiKey,
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
if (openaiApiKey) {
|
|
18
|
+
// Use direct OpenAI API
|
|
19
|
+
return new OpenAI({
|
|
20
|
+
apiKey: openaiApiKey,
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
throw new Error("Cosine similarity evaluator requires either ORQ_API_KEY or OPENAI_API_KEY environment variable to be set for embeddings");
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Creates a cosine similarity evaluator that returns the raw similarity score
|
|
27
|
+
* between the output and expected text using OpenAI embeddings
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* const evaluator = cosineSimilarityEvaluator({
|
|
32
|
+
* expectedText: "The capital of France is Paris"
|
|
33
|
+
* });
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
export function cosineSimilarityEvaluator(config) {
|
|
37
|
+
const { expectedText, model: userModel, name = "cosine-similarity" } = config;
|
|
38
|
+
// Lazy initialization of OpenAI client
|
|
39
|
+
let openaiClient = null;
|
|
40
|
+
const getClient = () => {
|
|
41
|
+
if (!openaiClient) {
|
|
42
|
+
openaiClient = createOpenAIClient();
|
|
43
|
+
}
|
|
44
|
+
return openaiClient;
|
|
45
|
+
};
|
|
46
|
+
// Determine the appropriate model based on the environment
|
|
47
|
+
const getModel = () => {
|
|
48
|
+
if (userModel)
|
|
49
|
+
return userModel;
|
|
50
|
+
const isOrq = !!process.env.ORQ_API_KEY;
|
|
51
|
+
return isOrq ? "openai/text-embedding-3-small" : "text-embedding-3-small";
|
|
52
|
+
};
|
|
53
|
+
return {
|
|
54
|
+
name,
|
|
55
|
+
scorer: async ({ output }) => {
|
|
56
|
+
if (output === undefined || output === null) {
|
|
57
|
+
return {
|
|
58
|
+
value: 0,
|
|
59
|
+
explanation: "Output is null or undefined",
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
const outputText = String(output);
|
|
63
|
+
const client = getClient(); // This will throw if no API keys
|
|
64
|
+
const model = getModel();
|
|
65
|
+
// Get embeddings for both texts
|
|
66
|
+
const [outputEmbedding, expectedEmbedding] = await Promise.all([
|
|
67
|
+
client.embeddings.create({
|
|
68
|
+
input: outputText,
|
|
69
|
+
model,
|
|
70
|
+
}),
|
|
71
|
+
client.embeddings.create({
|
|
72
|
+
input: expectedText,
|
|
73
|
+
model,
|
|
74
|
+
}),
|
|
75
|
+
]);
|
|
76
|
+
// Extract the embedding vectors
|
|
77
|
+
const outputVector = outputEmbedding.data[0].embedding;
|
|
78
|
+
const expectedVector = expectedEmbedding.data[0].embedding;
|
|
79
|
+
// Calculate cosine similarity
|
|
80
|
+
const similarity = cosineSimilarity(outputVector, expectedVector);
|
|
81
|
+
return {
|
|
82
|
+
value: similarity,
|
|
83
|
+
explanation: `Cosine similarity: ${similarity.toFixed(3)}`,
|
|
84
|
+
};
|
|
85
|
+
},
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Creates a cosine similarity evaluator that returns a boolean based on
|
|
90
|
+
* whether the similarity meets a threshold
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```typescript
|
|
94
|
+
* const evaluator = cosineSimilarityThresholdEvaluator({
|
|
95
|
+
* expectedText: "The capital of France is Paris",
|
|
96
|
+
* threshold: 0.8
|
|
97
|
+
* });
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
export function cosineSimilarityThresholdEvaluator(config) {
|
|
101
|
+
const { expectedText, threshold, model: userModel, name = "cosine-similarity-threshold", } = config;
|
|
102
|
+
// Lazy initialization of OpenAI client
|
|
103
|
+
let openaiClient = null;
|
|
104
|
+
const getClient = () => {
|
|
105
|
+
if (!openaiClient) {
|
|
106
|
+
openaiClient = createOpenAIClient();
|
|
107
|
+
}
|
|
108
|
+
return openaiClient;
|
|
109
|
+
};
|
|
110
|
+
// Determine the appropriate model based on the environment
|
|
111
|
+
const getModel = () => {
|
|
112
|
+
if (userModel)
|
|
113
|
+
return userModel;
|
|
114
|
+
const isOrq = !!process.env.ORQ_API_KEY;
|
|
115
|
+
return isOrq ? "openai/text-embedding-3-small" : "text-embedding-3-small";
|
|
116
|
+
};
|
|
117
|
+
return {
|
|
118
|
+
name,
|
|
119
|
+
scorer: async ({ output }) => {
|
|
120
|
+
if (output === undefined || output === null) {
|
|
121
|
+
return {
|
|
122
|
+
value: false,
|
|
123
|
+
explanation: "Output is null or undefined",
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
const outputText = String(output);
|
|
127
|
+
const client = getClient(); // This will throw if no API keys
|
|
128
|
+
const model = getModel();
|
|
129
|
+
// Get embeddings for both texts
|
|
130
|
+
const [outputEmbedding, expectedEmbedding] = await Promise.all([
|
|
131
|
+
client.embeddings.create({
|
|
132
|
+
input: outputText,
|
|
133
|
+
model,
|
|
134
|
+
}),
|
|
135
|
+
client.embeddings.create({
|
|
136
|
+
input: expectedText,
|
|
137
|
+
model,
|
|
138
|
+
}),
|
|
139
|
+
]);
|
|
140
|
+
// Extract the embedding vectors
|
|
141
|
+
const outputVector = outputEmbedding.data[0].embedding;
|
|
142
|
+
const expectedVector = expectedEmbedding.data[0].embedding;
|
|
143
|
+
// Calculate cosine similarity
|
|
144
|
+
const similarity = cosineSimilarity(outputVector, expectedVector);
|
|
145
|
+
const meetsThreshold = similarity >= threshold;
|
|
146
|
+
return {
|
|
147
|
+
value: meetsThreshold,
|
|
148
|
+
explanation: meetsThreshold
|
|
149
|
+
? `Similarity (${similarity.toFixed(3)}) meets threshold (${threshold})`
|
|
150
|
+
: `Similarity (${similarity.toFixed(3)}) below threshold (${threshold})`,
|
|
151
|
+
};
|
|
152
|
+
},
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Creates a simple cosine similarity evaluator with default settings
|
|
157
|
+
* @param expectedText The expected text to compare against
|
|
158
|
+
* @returns An evaluator that returns the cosine similarity score (0-1)
|
|
159
|
+
*/
|
|
160
|
+
export function simpleCosineSimilarity(expectedText) {
|
|
161
|
+
return cosineSimilarityEvaluator({ expectedText });
|
|
162
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluators.d.ts","sourceRoot":"","sources":["../../src/lib/evaluators.ts"],"names":[],"mappings":"AAAA,wBAAgB,UAAU,IAAI,MAAM,CAEnC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { Evaluator } from "@orq-ai/evaluatorq";
|
|
2
|
+
/**
|
|
3
|
+
* Validates that the output is valid JSON
|
|
4
|
+
*/
|
|
5
|
+
export declare const isValidJson: Evaluator;
|
|
6
|
+
/**
|
|
7
|
+
* Validates that the output contains specific JSON fields
|
|
8
|
+
*/
|
|
9
|
+
export declare function hasJsonFields(requiredFields: string[]): Evaluator;
|
|
10
|
+
/**
|
|
11
|
+
* Validates JSON schema compliance
|
|
12
|
+
*/
|
|
13
|
+
export declare function matchesJsonStructure(validator: (obj: any) => {
|
|
14
|
+
valid: boolean;
|
|
15
|
+
message?: string;
|
|
16
|
+
}): Evaluator;
|
|
17
|
+
/**
|
|
18
|
+
* Validates that JSON array has a specific length
|
|
19
|
+
*/
|
|
20
|
+
export declare function jsonArrayLength(expectedLength: number): Evaluator;
|
|
21
|
+
/**
|
|
22
|
+
* Validates that JSON array contains specific number of items within a range
|
|
23
|
+
*/
|
|
24
|
+
export declare function jsonArrayLengthInRange(min: number, max: number): Evaluator;
|
|
25
|
+
//# sourceMappingURL=json-evaluators.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-evaluators.d.ts","sourceRoot":"","sources":["../../src/lib/json-evaluators.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAEpD;;GAEG;AACH,eAAO,MAAM,WAAW,EAAE,SAwBzB,CAAC;AAEF;;GAEG;AACH,wBAAgB,aAAa,CAAC,cAAc,EAAE,MAAM,EAAE,GAAG,SAAS,CA2CjE;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CACnC,SAAS,EAAE,CAAC,GAAG,EAAE,GAAG,KAAK;IAAE,KAAK,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GAC3D,SAAS,CA2BX;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,cAAc,EAAE,MAAM,GAAG,SAAS,CAqCjE;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,SAAS,CAqC1E"}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validates that the output is valid JSON
|
|
3
|
+
*/
|
|
4
|
+
export const isValidJson = {
|
|
5
|
+
name: "is-valid-json",
|
|
6
|
+
scorer: async ({ output }) => {
|
|
7
|
+
if (output === undefined || output === null) {
|
|
8
|
+
return {
|
|
9
|
+
value: false,
|
|
10
|
+
explanation: "Output is null or undefined",
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
try {
|
|
14
|
+
const str = typeof output === "string" ? output : JSON.stringify(output);
|
|
15
|
+
JSON.parse(str);
|
|
16
|
+
return {
|
|
17
|
+
value: true,
|
|
18
|
+
explanation: "Output is valid JSON",
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return {
|
|
23
|
+
value: false,
|
|
24
|
+
explanation: "Output is not valid JSON",
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
};
|
|
29
|
+
/**
|
|
30
|
+
* Validates that the output contains specific JSON fields
|
|
31
|
+
*/
|
|
32
|
+
export function hasJsonFields(requiredFields) {
|
|
33
|
+
const fieldList = requiredFields.join("-");
|
|
34
|
+
return {
|
|
35
|
+
name: `has-fields-${fieldList.slice(0, 30)}`,
|
|
36
|
+
scorer: async ({ output }) => {
|
|
37
|
+
if (output === undefined || output === null) {
|
|
38
|
+
return {
|
|
39
|
+
value: false,
|
|
40
|
+
explanation: "Output is null or undefined",
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
try {
|
|
44
|
+
const obj = typeof output === "string" ? JSON.parse(output) : output;
|
|
45
|
+
if (typeof obj !== "object" || Array.isArray(obj)) {
|
|
46
|
+
return {
|
|
47
|
+
value: false,
|
|
48
|
+
explanation: "Output is not a JSON object",
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
const missingFields = requiredFields.filter((field) => !(field in obj));
|
|
52
|
+
if (missingFields.length === 0) {
|
|
53
|
+
return {
|
|
54
|
+
value: true,
|
|
55
|
+
explanation: `All required fields present: ${requiredFields.join(", ")}`,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
return {
|
|
59
|
+
value: false,
|
|
60
|
+
explanation: `Missing fields: ${missingFields.join(", ")}`,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
catch {
|
|
64
|
+
return {
|
|
65
|
+
value: false,
|
|
66
|
+
explanation: "Output is not valid JSON",
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Validates JSON schema compliance
|
|
74
|
+
*/
|
|
75
|
+
export function matchesJsonStructure(validator) {
|
|
76
|
+
return {
|
|
77
|
+
name: "matches-json-structure",
|
|
78
|
+
scorer: async ({ output }) => {
|
|
79
|
+
if (output === undefined || output === null) {
|
|
80
|
+
return {
|
|
81
|
+
value: false,
|
|
82
|
+
explanation: "Output is null or undefined",
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
try {
|
|
86
|
+
const obj = typeof output === "string" ? JSON.parse(output) : output;
|
|
87
|
+
const result = validator(obj);
|
|
88
|
+
return {
|
|
89
|
+
value: result.valid,
|
|
90
|
+
explanation: result.message || (result.valid ? "JSON structure is valid" : "JSON structure is invalid"),
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
catch (error) {
|
|
94
|
+
return {
|
|
95
|
+
value: false,
|
|
96
|
+
explanation: `Failed to parse JSON: ${error instanceof Error ? error.message : "Unknown error"}`,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Validates that JSON array has a specific length
|
|
104
|
+
*/
|
|
105
|
+
export function jsonArrayLength(expectedLength) {
|
|
106
|
+
return {
|
|
107
|
+
name: `json-array-length-${expectedLength}`,
|
|
108
|
+
scorer: async ({ output }) => {
|
|
109
|
+
if (output === undefined || output === null) {
|
|
110
|
+
return {
|
|
111
|
+
value: false,
|
|
112
|
+
explanation: "Output is null or undefined",
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
try {
|
|
116
|
+
const arr = typeof output === "string" ? JSON.parse(output) : output;
|
|
117
|
+
if (!Array.isArray(arr)) {
|
|
118
|
+
return {
|
|
119
|
+
value: false,
|
|
120
|
+
explanation: "Output is not a JSON array",
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
const hasExpectedLength = arr.length === expectedLength;
|
|
124
|
+
return {
|
|
125
|
+
value: hasExpectedLength,
|
|
126
|
+
explanation: hasExpectedLength
|
|
127
|
+
? `Array has expected length of ${expectedLength}`
|
|
128
|
+
: `Array length ${arr.length} does not match expected ${expectedLength}`,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
catch {
|
|
132
|
+
return {
|
|
133
|
+
value: false,
|
|
134
|
+
explanation: "Output is not valid JSON",
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
},
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Validates that JSON array contains specific number of items within a range
|
|
142
|
+
*/
|
|
143
|
+
export function jsonArrayLengthInRange(min, max) {
|
|
144
|
+
return {
|
|
145
|
+
name: `json-array-length-${min}-${max}`,
|
|
146
|
+
scorer: async ({ output }) => {
|
|
147
|
+
if (output === undefined || output === null) {
|
|
148
|
+
return {
|
|
149
|
+
value: false,
|
|
150
|
+
explanation: "Output is null or undefined",
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
try {
|
|
154
|
+
const arr = typeof output === "string" ? JSON.parse(output) : output;
|
|
155
|
+
if (!Array.isArray(arr)) {
|
|
156
|
+
return {
|
|
157
|
+
value: false,
|
|
158
|
+
explanation: "Output is not a JSON array",
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
const isInRange = arr.length >= min && arr.length <= max;
|
|
162
|
+
return {
|
|
163
|
+
value: isInRange,
|
|
164
|
+
explanation: isInRange
|
|
165
|
+
? `Array length ${arr.length} is within range [${min}, ${max}]`
|
|
166
|
+
: `Array length ${arr.length} is outside range [${min}, ${max}]`,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
catch {
|
|
170
|
+
return {
|
|
171
|
+
value: false,
|
|
172
|
+
explanation: "Output is not valid JSON",
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
},
|
|
176
|
+
};
|
|
177
|
+
}
|