@orq-ai/evaluators 1.0.0-12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,48 @@
1
+ # @orq-ai/evaluators
2
+
3
+ Reusable evaluators for AI evaluation frameworks. This package provides a collection of pre-built evaluators that can be imported and used in your `.eval` files.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npm install @orq-ai/evaluators
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ### Cosine Similarity Evaluator
14
+
15
+ Compare semantic similarity between output and expected text using OpenAI embeddings:
16
+
17
+ ```typescript
18
+ import {
19
+ cosineSimilarityEvaluator,
20
+ cosineSimilarityThresholdEvaluator,
21
+ simpleCosineSimilarity
22
+ } from "@orq-ai/evaluators";
23
+
24
+ // Simple usage - returns similarity score (0-1)
25
+ const evaluator = simpleCosineSimilarity("The capital of France is Paris");
26
+
27
+ // With threshold - returns boolean based on threshold
28
+ const thresholdEvaluator = cosineSimilarityThresholdEvaluator({
29
+ expectedText: "The capital of France is Paris",
30
+ threshold: 0.8,
31
+ name: "semantic-match"
32
+ });
33
+
34
+ // Advanced configuration
35
+ const customEvaluator = cosineSimilarityEvaluator({
36
+ expectedText: "Expected output text",
37
+ model: "text-embedding-3-large", // optional: custom embedding model
38
+ name: "custom-similarity"
39
+ });
40
+ ```
41
+
42
+ #### Environment Variables
43
+
44
+ The cosine similarity evaluator requires one of:
45
+ - `OPENAI_API_KEY` - For direct OpenAI API access
46
+ - `ORQ_API_KEY` - For Orq proxy access (automatically uses `https://api.orq.ai/v2/proxy`)
47
+
48
+ When using Orq proxy, models should be prefixed with `openai/` (e.g., `openai/text-embedding-3-small`).
@@ -0,0 +1,3 @@
1
+ export * from "./lib/cosine-similarity-evaluator.js";
2
+ export * from "./lib/vector-utils.js";
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,sCAAsC,CAAC;AACrD,cAAc,uBAAuB,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,2 @@
1
+ export * from "./lib/cosine-similarity-evaluator.js";
2
+ export * from "./lib/vector-utils.js";
@@ -0,0 +1,26 @@
1
+ import type { Evaluator } from "@orq-ai/evaluatorq";
2
+ /**
3
+ * Validates exact equality between output and expected output
4
+ */
5
+ export declare const exactMatch: Evaluator;
6
+ /**
7
+ * Validates fuzzy equality (case-insensitive, trimmed strings)
8
+ */
9
+ export declare const fuzzyMatch: Evaluator;
10
+ /**
11
+ * Validates that a numeric output is within a tolerance of the expected value
12
+ */
13
+ export declare function withinTolerance(tolerance: number): Evaluator;
14
+ /**
15
+ * Validates that a numeric output is greater than a threshold
16
+ */
17
+ export declare function greaterThan(threshold: number): Evaluator;
18
+ /**
19
+ * Validates that a numeric output is less than a threshold
20
+ */
21
+ export declare function lessThan(threshold: number): Evaluator;
22
+ /**
23
+ * Validates that a numeric output is within a range (inclusive)
24
+ */
25
+ export declare function inRange(min: number, max: number): Evaluator;
26
+ //# sourceMappingURL=comparison-evaluators.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"comparison-evaluators.d.ts","sourceRoot":"","sources":["../../src/lib/comparison-evaluators.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAEpD;;GAEG;AACH,eAAO,MAAM,UAAU,EAAE,SAmBxB,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,UAAU,EAAE,SAqBxB,CAAC;AAEF;;GAEG;AACH,wBAAgB,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,SAAS,CAgC5D;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,SAAS,EAAE,MAAM,GAAG,SAAS,CAuBxD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,SAAS,EAAE,MAAM,GAAG,SAAS,CAuBrD;AAED;;GAEG;AACH,wBAAgB,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,SAAS,CAuB3D"}
@@ -0,0 +1,148 @@
1
+ /**
2
+ * Validates exact equality between output and expected output
3
+ */
4
+ export const exactMatch = {
5
+ name: "exact-match",
6
+ scorer: async ({ output, data }) => {
7
+ if (data.expectedOutput === undefined) {
8
+ return {
9
+ value: true,
10
+ explanation: "No expected output provided, skipping validation",
11
+ };
12
+ }
13
+ const matches = JSON.stringify(output) === JSON.stringify(data.expectedOutput);
14
+ return {
15
+ value: matches,
16
+ explanation: matches
17
+ ? "Output exactly matches expected"
18
+ : "Output does not match expected",
19
+ };
20
+ },
21
+ };
22
+ /**
23
+ * Validates fuzzy equality (case-insensitive, trimmed strings)
24
+ */
25
+ export const fuzzyMatch = {
26
+ name: "fuzzy-match",
27
+ scorer: async ({ output, data }) => {
28
+ if (data.expectedOutput === undefined) {
29
+ return {
30
+ value: true,
31
+ explanation: "No expected output provided, skipping validation",
32
+ };
33
+ }
34
+ const outputStr = String(output).trim().toLowerCase();
35
+ const expectedStr = String(data.expectedOutput).trim().toLowerCase();
36
+ const matches = outputStr === expectedStr;
37
+ return {
38
+ value: matches,
39
+ explanation: matches
40
+ ? "Output matches expected (case-insensitive)"
41
+ : "Output does not match expected",
42
+ };
43
+ },
44
+ };
45
+ /**
46
+ * Validates that a numeric output is within a tolerance of the expected value
47
+ */
48
+ export function withinTolerance(tolerance) {
49
+ return {
50
+ name: `within-tolerance-${tolerance}`,
51
+ scorer: async ({ output, data }) => {
52
+ if (data.expectedOutput === undefined) {
53
+ return {
54
+ value: true,
55
+ explanation: "No expected output provided, skipping validation",
56
+ };
57
+ }
58
+ const outputNum = Number(output);
59
+ const expectedNum = Number(data.expectedOutput);
60
+ if (Number.isNaN(outputNum) || Number.isNaN(expectedNum)) {
61
+ return {
62
+ value: false,
63
+ explanation: "Output or expected value is not a valid number",
64
+ };
65
+ }
66
+ const difference = Math.abs(outputNum - expectedNum);
67
+ const isWithinTolerance = difference <= tolerance;
68
+ return {
69
+ value: isWithinTolerance,
70
+ explanation: isWithinTolerance
71
+ ? `Value ${outputNum} is within ${tolerance} of expected ${expectedNum}`
72
+ : `Value ${outputNum} differs by ${difference} from expected ${expectedNum} (tolerance: ${tolerance})`,
73
+ };
74
+ },
75
+ };
76
+ }
77
+ /**
78
+ * Validates that a numeric output is greater than a threshold
79
+ */
80
+ export function greaterThan(threshold) {
81
+ return {
82
+ name: `greater-than-${threshold}`,
83
+ scorer: async ({ output }) => {
84
+ const value = Number(output);
85
+ if (Number.isNaN(value)) {
86
+ return {
87
+ value: false,
88
+ explanation: "Output is not a valid number",
89
+ };
90
+ }
91
+ const isGreater = value > threshold;
92
+ return {
93
+ value: isGreater,
94
+ explanation: isGreater
95
+ ? `Value ${value} is greater than ${threshold}`
96
+ : `Value ${value} is not greater than ${threshold}`,
97
+ };
98
+ },
99
+ };
100
+ }
101
+ /**
102
+ * Validates that a numeric output is less than a threshold
103
+ */
104
+ export function lessThan(threshold) {
105
+ return {
106
+ name: `less-than-${threshold}`,
107
+ scorer: async ({ output }) => {
108
+ const value = Number(output);
109
+ if (Number.isNaN(value)) {
110
+ return {
111
+ value: false,
112
+ explanation: "Output is not a valid number",
113
+ };
114
+ }
115
+ const isLess = value < threshold;
116
+ return {
117
+ value: isLess,
118
+ explanation: isLess
119
+ ? `Value ${value} is less than ${threshold}`
120
+ : `Value ${value} is not less than ${threshold}`,
121
+ };
122
+ },
123
+ };
124
+ }
125
+ /**
126
+ * Validates that a numeric output is within a range (inclusive)
127
+ */
128
+ export function inRange(min, max) {
129
+ return {
130
+ name: `in-range-${min}-${max}`,
131
+ scorer: async ({ output }) => {
132
+ const value = Number(output);
133
+ if (Number.isNaN(value)) {
134
+ return {
135
+ value: false,
136
+ explanation: "Output is not a valid number",
137
+ };
138
+ }
139
+ const isInRange = value >= min && value <= max;
140
+ return {
141
+ value: isInRange,
142
+ explanation: isInRange
143
+ ? `Value ${value} is within range [${min}, ${max}]`
144
+ : `Value ${value} is outside range [${min}, ${max}]`,
145
+ };
146
+ },
147
+ };
148
+ }
@@ -0,0 +1,62 @@
1
+ import type { Evaluator } from "@orq-ai/evaluatorq";
2
+ /**
3
+ * Configuration options for the cosine similarity evaluator
4
+ */
5
+ export interface CosineSimilarityConfig {
6
+ /**
7
+ * The expected text to compare against the output
8
+ */
9
+ expectedText: string;
10
+ /**
11
+ * The embedding model to use
12
+ * @default "text-embedding-3-small" for OpenAI, "openai/text-embedding-3-small" for Orq
13
+ */
14
+ model?: string;
15
+ /**
16
+ * Optional name for the evaluator
17
+ * @default "cosine-similarity"
18
+ */
19
+ name?: string;
20
+ }
21
+ /**
22
+ * Configuration options for the cosine similarity threshold evaluator
23
+ */
24
+ export interface CosineSimilarityThresholdConfig extends CosineSimilarityConfig {
25
+ /**
26
+ * Threshold for similarity score (0-1)
27
+ * The evaluator will return true if similarity meets the threshold
28
+ */
29
+ threshold: number;
30
+ }
31
+ /**
32
+ * Creates a cosine similarity evaluator that returns the raw similarity score
33
+ * between the output and expected text using OpenAI embeddings
34
+ *
35
+ * @example
36
+ * ```typescript
37
+ * const evaluator = cosineSimilarityEvaluator({
38
+ * expectedText: "The capital of France is Paris"
39
+ * });
40
+ * ```
41
+ */
42
+ export declare function cosineSimilarityEvaluator(config: CosineSimilarityConfig): Evaluator;
43
+ /**
44
+ * Creates a cosine similarity evaluator that returns a boolean based on
45
+ * whether the similarity meets a threshold
46
+ *
47
+ * @example
48
+ * ```typescript
49
+ * const evaluator = cosineSimilarityThresholdEvaluator({
50
+ * expectedText: "The capital of France is Paris",
51
+ * threshold: 0.8
52
+ * });
53
+ * ```
54
+ */
55
+ export declare function cosineSimilarityThresholdEvaluator(config: CosineSimilarityThresholdConfig): Evaluator;
56
+ /**
57
+ * Creates a simple cosine similarity evaluator with default settings
58
+ * @param expectedText The expected text to compare against
59
+ * @returns An evaluator that returns the cosine similarity score (0-1)
60
+ */
61
+ export declare function simpleCosineSimilarity(expectedText: string): Evaluator;
62
+ //# sourceMappingURL=cosine-similarity-evaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cosine-similarity-evaluator.d.ts","sourceRoot":"","sources":["../../src/lib/cosine-similarity-evaluator.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAgCpD;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,+BACf,SAAQ,sBAAsB;IAC9B;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,sBAAsB,GAC7B,SAAS,CA2DX;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,+BAA+B,GACtC,SAAS,CAmEX;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,CAAC,YAAY,EAAE,MAAM,GAAG,SAAS,CAEtE"}
@@ -0,0 +1,162 @@
1
+ import OpenAI from "openai";
2
+ import { cosineSimilarity } from "./vector-utils.js";
3
+ /**
4
+ * Creates an OpenAI client configured for either direct OpenAI API access or Orq proxy
5
+ * @throws {Error} If neither OPENAI_API_KEY nor ORQ_API_KEY is defined
6
+ */
7
+ function createOpenAIClient() {
8
+ const orqApiKey = process.env.ORQ_API_KEY;
9
+ const openaiApiKey = process.env.OPENAI_API_KEY;
10
+ if (orqApiKey) {
11
+ // Use Orq proxy when ORQ_API_KEY is available
12
+ return new OpenAI({
13
+ baseURL: "https://api.orq.ai/v2/proxy",
14
+ apiKey: orqApiKey,
15
+ });
16
+ }
17
+ if (openaiApiKey) {
18
+ // Use direct OpenAI API
19
+ return new OpenAI({
20
+ apiKey: openaiApiKey,
21
+ });
22
+ }
23
+ throw new Error("Cosine similarity evaluator requires either ORQ_API_KEY or OPENAI_API_KEY environment variable to be set for embeddings");
24
+ }
25
+ /**
26
+ * Creates a cosine similarity evaluator that returns the raw similarity score
27
+ * between the output and expected text using OpenAI embeddings
28
+ *
29
+ * @example
30
+ * ```typescript
31
+ * const evaluator = cosineSimilarityEvaluator({
32
+ * expectedText: "The capital of France is Paris"
33
+ * });
34
+ * ```
35
+ */
36
+ export function cosineSimilarityEvaluator(config) {
37
+ const { expectedText, model: userModel, name = "cosine-similarity" } = config;
38
+ // Lazy initialization of OpenAI client
39
+ let openaiClient = null;
40
+ const getClient = () => {
41
+ if (!openaiClient) {
42
+ openaiClient = createOpenAIClient();
43
+ }
44
+ return openaiClient;
45
+ };
46
+ // Determine the appropriate model based on the environment
47
+ const getModel = () => {
48
+ if (userModel)
49
+ return userModel;
50
+ const isOrq = !!process.env.ORQ_API_KEY;
51
+ return isOrq ? "openai/text-embedding-3-small" : "text-embedding-3-small";
52
+ };
53
+ return {
54
+ name,
55
+ scorer: async ({ output }) => {
56
+ if (output === undefined || output === null) {
57
+ return {
58
+ value: 0,
59
+ explanation: "Output is null or undefined",
60
+ };
61
+ }
62
+ const outputText = String(output);
63
+ const client = getClient(); // This will throw if no API keys
64
+ const model = getModel();
65
+ // Get embeddings for both texts
66
+ const [outputEmbedding, expectedEmbedding] = await Promise.all([
67
+ client.embeddings.create({
68
+ input: outputText,
69
+ model,
70
+ }),
71
+ client.embeddings.create({
72
+ input: expectedText,
73
+ model,
74
+ }),
75
+ ]);
76
+ // Extract the embedding vectors
77
+ const outputVector = outputEmbedding.data[0].embedding;
78
+ const expectedVector = expectedEmbedding.data[0].embedding;
79
+ // Calculate cosine similarity
80
+ const similarity = cosineSimilarity(outputVector, expectedVector);
81
+ return {
82
+ value: similarity,
83
+ explanation: `Cosine similarity: ${similarity.toFixed(3)}`,
84
+ };
85
+ },
86
+ };
87
+ }
88
+ /**
89
+ * Creates a cosine similarity evaluator that returns a boolean based on
90
+ * whether the similarity meets a threshold
91
+ *
92
+ * @example
93
+ * ```typescript
94
+ * const evaluator = cosineSimilarityThresholdEvaluator({
95
+ * expectedText: "The capital of France is Paris",
96
+ * threshold: 0.8
97
+ * });
98
+ * ```
99
+ */
100
+ export function cosineSimilarityThresholdEvaluator(config) {
101
+ const { expectedText, threshold, model: userModel, name = "cosine-similarity-threshold", } = config;
102
+ // Lazy initialization of OpenAI client
103
+ let openaiClient = null;
104
+ const getClient = () => {
105
+ if (!openaiClient) {
106
+ openaiClient = createOpenAIClient();
107
+ }
108
+ return openaiClient;
109
+ };
110
+ // Determine the appropriate model based on the environment
111
+ const getModel = () => {
112
+ if (userModel)
113
+ return userModel;
114
+ const isOrq = !!process.env.ORQ_API_KEY;
115
+ return isOrq ? "openai/text-embedding-3-small" : "text-embedding-3-small";
116
+ };
117
+ return {
118
+ name,
119
+ scorer: async ({ output }) => {
120
+ if (output === undefined || output === null) {
121
+ return {
122
+ value: false,
123
+ explanation: "Output is null or undefined",
124
+ };
125
+ }
126
+ const outputText = String(output);
127
+ const client = getClient(); // This will throw if no API keys
128
+ const model = getModel();
129
+ // Get embeddings for both texts
130
+ const [outputEmbedding, expectedEmbedding] = await Promise.all([
131
+ client.embeddings.create({
132
+ input: outputText,
133
+ model,
134
+ }),
135
+ client.embeddings.create({
136
+ input: expectedText,
137
+ model,
138
+ }),
139
+ ]);
140
+ // Extract the embedding vectors
141
+ const outputVector = outputEmbedding.data[0].embedding;
142
+ const expectedVector = expectedEmbedding.data[0].embedding;
143
+ // Calculate cosine similarity
144
+ const similarity = cosineSimilarity(outputVector, expectedVector);
145
+ const meetsThreshold = similarity >= threshold;
146
+ return {
147
+ value: meetsThreshold,
148
+ explanation: meetsThreshold
149
+ ? `Similarity (${similarity.toFixed(3)}) meets threshold (${threshold})`
150
+ : `Similarity (${similarity.toFixed(3)}) below threshold (${threshold})`,
151
+ };
152
+ },
153
+ };
154
+ }
155
+ /**
156
+ * Creates a simple cosine similarity evaluator with default settings
157
+ * @param expectedText The expected text to compare against
158
+ * @returns An evaluator that returns the cosine similarity score (0-1)
159
+ */
160
+ export function simpleCosineSimilarity(expectedText) {
161
+ return cosineSimilarityEvaluator({ expectedText });
162
+ }
@@ -0,0 +1,2 @@
1
+ export declare function evaluators(): string;
2
+ //# sourceMappingURL=evaluators.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluators.d.ts","sourceRoot":"","sources":["../../src/lib/evaluators.ts"],"names":[],"mappings":"AAAA,wBAAgB,UAAU,IAAI,MAAM,CAEnC"}
@@ -0,0 +1,3 @@
1
+ export function evaluators() {
2
+ return "evaluators";
3
+ }
@@ -0,0 +1,25 @@
1
+ import type { Evaluator } from "@orq-ai/evaluatorq";
2
+ /**
3
+ * Validates that the output is valid JSON
4
+ */
5
+ export declare const isValidJson: Evaluator;
6
+ /**
7
+ * Validates that the output contains specific JSON fields
8
+ */
9
+ export declare function hasJsonFields(requiredFields: string[]): Evaluator;
10
+ /**
11
+ * Validates JSON schema compliance
12
+ */
13
+ export declare function matchesJsonStructure(validator: (obj: any) => {
14
+ valid: boolean;
15
+ message?: string;
16
+ }): Evaluator;
17
+ /**
18
+ * Validates that JSON array has a specific length
19
+ */
20
+ export declare function jsonArrayLength(expectedLength: number): Evaluator;
21
+ /**
22
+ * Validates that JSON array contains specific number of items within a range
23
+ */
24
+ export declare function jsonArrayLengthInRange(min: number, max: number): Evaluator;
25
+ //# sourceMappingURL=json-evaluators.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json-evaluators.d.ts","sourceRoot":"","sources":["../../src/lib/json-evaluators.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAEpD;;GAEG;AACH,eAAO,MAAM,WAAW,EAAE,SAwBzB,CAAC;AAEF;;GAEG;AACH,wBAAgB,aAAa,CAAC,cAAc,EAAE,MAAM,EAAE,GAAG,SAAS,CA2CjE;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CACnC,SAAS,EAAE,CAAC,GAAG,EAAE,GAAG,KAAK;IAAE,KAAK,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAE,GAC3D,SAAS,CA2BX;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,cAAc,EAAE,MAAM,GAAG,SAAS,CAqCjE;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,SAAS,CAqC1E"}
@@ -0,0 +1,177 @@
1
+ /**
2
+ * Validates that the output is valid JSON
3
+ */
4
+ export const isValidJson = {
5
+ name: "is-valid-json",
6
+ scorer: async ({ output }) => {
7
+ if (output === undefined || output === null) {
8
+ return {
9
+ value: false,
10
+ explanation: "Output is null or undefined",
11
+ };
12
+ }
13
+ try {
14
+ const str = typeof output === "string" ? output : JSON.stringify(output);
15
+ JSON.parse(str);
16
+ return {
17
+ value: true,
18
+ explanation: "Output is valid JSON",
19
+ };
20
+ }
21
+ catch {
22
+ return {
23
+ value: false,
24
+ explanation: "Output is not valid JSON",
25
+ };
26
+ }
27
+ },
28
+ };
29
+ /**
30
+ * Validates that the output contains specific JSON fields
31
+ */
32
+ export function hasJsonFields(requiredFields) {
33
+ const fieldList = requiredFields.join("-");
34
+ return {
35
+ name: `has-fields-${fieldList.slice(0, 30)}`,
36
+ scorer: async ({ output }) => {
37
+ if (output === undefined || output === null) {
38
+ return {
39
+ value: false,
40
+ explanation: "Output is null or undefined",
41
+ };
42
+ }
43
+ try {
44
+ const obj = typeof output === "string" ? JSON.parse(output) : output;
45
+ if (typeof obj !== "object" || Array.isArray(obj)) {
46
+ return {
47
+ value: false,
48
+ explanation: "Output is not a JSON object",
49
+ };
50
+ }
51
+ const missingFields = requiredFields.filter((field) => !(field in obj));
52
+ if (missingFields.length === 0) {
53
+ return {
54
+ value: true,
55
+ explanation: `All required fields present: ${requiredFields.join(", ")}`,
56
+ };
57
+ }
58
+ return {
59
+ value: false,
60
+ explanation: `Missing fields: ${missingFields.join(", ")}`,
61
+ };
62
+ }
63
+ catch {
64
+ return {
65
+ value: false,
66
+ explanation: "Output is not valid JSON",
67
+ };
68
+ }
69
+ },
70
+ };
71
+ }
72
+ /**
73
+ * Validates JSON schema compliance
74
+ */
75
+ export function matchesJsonStructure(validator) {
76
+ return {
77
+ name: "matches-json-structure",
78
+ scorer: async ({ output }) => {
79
+ if (output === undefined || output === null) {
80
+ return {
81
+ value: false,
82
+ explanation: "Output is null or undefined",
83
+ };
84
+ }
85
+ try {
86
+ const obj = typeof output === "string" ? JSON.parse(output) : output;
87
+ const result = validator(obj);
88
+ return {
89
+ value: result.valid,
90
+ explanation: result.message || (result.valid ? "JSON structure is valid" : "JSON structure is invalid"),
91
+ };
92
+ }
93
+ catch (error) {
94
+ return {
95
+ value: false,
96
+ explanation: `Failed to parse JSON: ${error instanceof Error ? error.message : "Unknown error"}`,
97
+ };
98
+ }
99
+ },
100
+ };
101
+ }
102
+ /**
103
+ * Validates that JSON array has a specific length
104
+ */
105
+ export function jsonArrayLength(expectedLength) {
106
+ return {
107
+ name: `json-array-length-${expectedLength}`,
108
+ scorer: async ({ output }) => {
109
+ if (output === undefined || output === null) {
110
+ return {
111
+ value: false,
112
+ explanation: "Output is null or undefined",
113
+ };
114
+ }
115
+ try {
116
+ const arr = typeof output === "string" ? JSON.parse(output) : output;
117
+ if (!Array.isArray(arr)) {
118
+ return {
119
+ value: false,
120
+ explanation: "Output is not a JSON array",
121
+ };
122
+ }
123
+ const hasExpectedLength = arr.length === expectedLength;
124
+ return {
125
+ value: hasExpectedLength,
126
+ explanation: hasExpectedLength
127
+ ? `Array has expected length of ${expectedLength}`
128
+ : `Array length ${arr.length} does not match expected ${expectedLength}`,
129
+ };
130
+ }
131
+ catch {
132
+ return {
133
+ value: false,
134
+ explanation: "Output is not valid JSON",
135
+ };
136
+ }
137
+ },
138
+ };
139
+ }
140
+ /**
141
+ * Validates that JSON array contains specific number of items within a range
142
+ */
143
+ export function jsonArrayLengthInRange(min, max) {
144
+ return {
145
+ name: `json-array-length-${min}-${max}`,
146
+ scorer: async ({ output }) => {
147
+ if (output === undefined || output === null) {
148
+ return {
149
+ value: false,
150
+ explanation: "Output is null or undefined",
151
+ };
152
+ }
153
+ try {
154
+ const arr = typeof output === "string" ? JSON.parse(output) : output;
155
+ if (!Array.isArray(arr)) {
156
+ return {
157
+ value: false,
158
+ explanation: "Output is not a JSON array",
159
+ };
160
+ }
161
+ const isInRange = arr.length >= min && arr.length <= max;
162
+ return {
163
+ value: isInRange,
164
+ explanation: isInRange
165
+ ? `Array length ${arr.length} is within range [${min}, ${max}]`
166
+ : `Array length ${arr.length} is outside range [${min}, ${max}]`,
167
+ };
168
+ }
169
+ catch {
170
+ return {
171
+ value: false,
172
+ explanation: "Output is not valid JSON",
173
+ };
174
+ }
175
+ },
176
+ };
177
+ }