qualifire 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +206 -49
- package/lib/index.d.ts +2 -2
- package/lib/index.js +25 -13
- package/lib/types.d.ts +19 -7
- package/lib/types.js +17 -7
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Qualifire
|
|
1
|
+
# Qualifire SDK
|
|
2
2
|
|
|
3
3
|
[](https://github.com/qualifire-dev/qualifire-typescript-sdk/actions/workflows/codeql-analysis.yml)
|
|
4
4
|
[](https://github.com/qualifire-dev/qualifire-typescript-sdk/actions/workflows/release.yml)
|
|
@@ -7,9 +7,7 @@ Qualifire
|
|
|
7
7
|
[![Commitizen Friendly][commitizen-img]][commitizen-url]
|
|
8
8
|
[![Semantic Release][semantic-release-img]][semantic-release-url]
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
This is the official SDK for interacting with the Qualifire API.
|
|
10
|
+
The official TypeScript SDK for evaluating LLM outputs with [Qualifire](https://qualifire.ai). Detect hallucinations, prompt injections, PII leakage, content policy violations, and more.
|
|
13
11
|
|
|
14
12
|
## Installation
|
|
15
13
|
|
|
@@ -17,79 +15,238 @@ This is the official SDK for interacting with the Qualifire API.
|
|
|
17
15
|
npm install qualifire
|
|
18
16
|
```
|
|
19
17
|
|
|
20
|
-
##
|
|
18
|
+
## Quick Start
|
|
19
|
+
|
|
20
|
+
```typescript
|
|
21
|
+
import { Qualifire } from 'qualifire';
|
|
22
|
+
import OpenAI from 'openai';
|
|
23
|
+
|
|
24
|
+
const qualifire = new Qualifire({ apiKey: 'your-api-key' });
|
|
25
|
+
const openai = new OpenAI();
|
|
26
|
+
|
|
27
|
+
// Make your LLM call
|
|
28
|
+
const request = {
|
|
29
|
+
model: 'gpt-4o',
|
|
30
|
+
messages: [
|
|
31
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
32
|
+
{ role: 'user', content: 'What is the capital of France?' },
|
|
33
|
+
],
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const response = await openai.chat.completions.create(request);
|
|
21
37
|
|
|
22
|
-
|
|
38
|
+
// Evaluate the response
|
|
39
|
+
const evaluation = await qualifire.evaluate({
|
|
40
|
+
framework: 'openai',
|
|
41
|
+
request,
|
|
42
|
+
response,
|
|
43
|
+
hallucinationsCheck: true,
|
|
44
|
+
groundingCheck: true,
|
|
45
|
+
});
|
|
23
46
|
|
|
24
|
-
|
|
25
|
-
|
|
47
|
+
console.log(evaluation);
|
|
48
|
+
// {
|
|
49
|
+
// status: 'passed',
|
|
50
|
+
// score: 100,
|
|
51
|
+
// evaluationResults: [...]
|
|
52
|
+
// }
|
|
26
53
|
```
|
|
27
54
|
|
|
28
|
-
|
|
55
|
+
## Supported Frameworks
|
|
29
56
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
57
|
+
| Framework | Value | SDK |
|
|
58
|
+
|-----------|-------|-----|
|
|
59
|
+
| OpenAI | `openai` | `openai` (Chat Completions & Responses API) |
|
|
60
|
+
| Anthropic Claude | `claude` | `@anthropic-ai/sdk` |
|
|
61
|
+
| Google Gemini | `gemini` | `@google/genai` |
|
|
62
|
+
| Vercel AI SDK | `vercelai` | `ai` |
|
|
63
|
+
|
|
64
|
+
All frameworks support both streaming and non-streaming responses.
|
|
65
|
+
|
|
66
|
+
## Available Evaluation Checks
|
|
67
|
+
|
|
68
|
+
| Check | Parameter | Description |
|
|
69
|
+
|-------|-----------|-------------|
|
|
70
|
+
| Hallucinations | `hallucinationsCheck` | Detect fabricated information |
|
|
71
|
+
| Grounding | `groundingCheck` | Verify responses are grounded in context |
|
|
72
|
+
| Prompt Injections | `promptInjections` | Detect prompt injection attempts |
|
|
73
|
+
| PII Detection | `piiCheck` | Identify personally identifiable information |
|
|
74
|
+
| Content Moderation | `contentModerationCheck` | Flag harmful content |
|
|
75
|
+
| Instructions Following | `instructionsFollowingCheck` | Verify adherence to system instructions |
|
|
76
|
+
| Tool Selection Quality | `toolSelectionQualityCheck` | Evaluate tool/function call accuracy |
|
|
77
|
+
| Custom Assertions | `assertions` | Array of custom assertion strings |
|
|
78
|
+
|
|
79
|
+
## Framework Examples
|
|
80
|
+
|
|
81
|
+
### OpenAI
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
// Chat Completions API
|
|
85
|
+
const request = {
|
|
86
|
+
model: 'gpt-4o',
|
|
87
|
+
messages: [{ role: 'user', content: 'Hello!' }],
|
|
88
|
+
};
|
|
89
|
+
const response = await openai.chat.completions.create(request);
|
|
90
|
+
|
|
91
|
+
await qualifire.evaluate({
|
|
92
|
+
framework: 'openai',
|
|
93
|
+
request,
|
|
94
|
+
response,
|
|
95
|
+
hallucinationsCheck: true,
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// Streaming
|
|
99
|
+
const streamRequest = { ...request, stream: true };
|
|
100
|
+
const stream = await openai.chat.completions.create(streamRequest);
|
|
101
|
+
|
|
102
|
+
const chunks = [];
|
|
103
|
+
for await (const chunk of stream) {
|
|
104
|
+
chunks.push(chunk);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
await qualifire.evaluate({
|
|
108
|
+
framework: 'openai',
|
|
109
|
+
request: streamRequest,
|
|
110
|
+
response: chunks,
|
|
111
|
+
hallucinationsCheck: true,
|
|
33
112
|
});
|
|
34
113
|
```
|
|
35
114
|
|
|
36
|
-
|
|
115
|
+
### Anthropic Claude
|
|
37
116
|
|
|
38
|
-
|
|
117
|
+
```typescript
|
|
118
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
39
119
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
content: 'this is my awesome request',
|
|
47
|
-
},
|
|
48
|
-
],
|
|
120
|
+
const anthropic = new Anthropic();
|
|
121
|
+
|
|
122
|
+
const request = {
|
|
123
|
+
model: 'claude-sonnet-4-20250514',
|
|
124
|
+
max_tokens: 1024,
|
|
125
|
+
messages: [{ role: 'user', content: 'Hello!' }],
|
|
49
126
|
};
|
|
127
|
+
const response = await anthropic.messages.create(request);
|
|
50
128
|
|
|
51
|
-
|
|
129
|
+
await qualifire.evaluate({
|
|
130
|
+
framework: 'claude',
|
|
131
|
+
request,
|
|
132
|
+
response,
|
|
133
|
+
promptInjections: true,
|
|
134
|
+
});
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Google Gemini
|
|
138
|
+
|
|
139
|
+
```typescript
|
|
140
|
+
import { GoogleGenAI } from '@google/genai';
|
|
141
|
+
|
|
142
|
+
const genai = new GoogleGenAI({ apiKey: 'your-key' });
|
|
143
|
+
|
|
144
|
+
const request = {
|
|
145
|
+
model: 'gemini-2.0-flash',
|
|
146
|
+
contents: [{ role: 'user', parts: [{ text: 'Hello!' }] }],
|
|
147
|
+
};
|
|
148
|
+
const response = await genai.models.generateContent(request);
|
|
149
|
+
|
|
150
|
+
await qualifire.evaluate({
|
|
151
|
+
framework: 'gemini',
|
|
152
|
+
request,
|
|
153
|
+
response,
|
|
154
|
+
contentModerationCheck: true,
|
|
155
|
+
});
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Vercel AI SDK
|
|
159
|
+
|
|
160
|
+
```typescript
|
|
161
|
+
import { generateText } from 'ai';
|
|
162
|
+
import { openai } from '@ai-sdk/openai';
|
|
163
|
+
|
|
164
|
+
const request = {
|
|
165
|
+
model: openai('gpt-4o'),
|
|
166
|
+
prompt: 'Hello!',
|
|
167
|
+
};
|
|
168
|
+
const response = await generateText(request);
|
|
52
169
|
|
|
53
|
-
|
|
54
|
-
|
|
170
|
+
await qualifire.evaluate({
|
|
171
|
+
framework: 'vercelai',
|
|
172
|
+
request,
|
|
173
|
+
response,
|
|
174
|
+
piiCheck: true,
|
|
175
|
+
});
|
|
55
176
|
```
|
|
56
177
|
|
|
57
|
-
|
|
178
|
+
## Direct Message Mode
|
|
58
179
|
|
|
59
|
-
|
|
180
|
+
For cases where you don't use a supported framework, pass messages directly:
|
|
60
181
|
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
model: 'gpt-3.5-turbo',
|
|
182
|
+
```typescript
|
|
183
|
+
await qualifire.evaluate({
|
|
64
184
|
messages: [
|
|
65
|
-
{
|
|
66
|
-
|
|
67
|
-
content: 'this is my awesome request',
|
|
68
|
-
},
|
|
185
|
+
{ role: 'user', content: 'What is 2+2?' },
|
|
186
|
+
{ role: 'assistant', content: 'The answer is 4.' },
|
|
69
187
|
],
|
|
70
|
-
|
|
188
|
+
hallucinationsCheck: true,
|
|
189
|
+
groundingCheck: true,
|
|
190
|
+
});
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Invoke Pre-configured Evaluations
|
|
71
194
|
|
|
72
|
-
|
|
195
|
+
Run evaluations configured in the Qualifire dashboard:
|
|
73
196
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
197
|
+
```typescript
|
|
198
|
+
const result = await qualifire.invokeEvaluation({
|
|
199
|
+
input: 'What is the capital of France?',
|
|
200
|
+
output: 'Paris is the capital of France.',
|
|
201
|
+
evaluationId: 'eval-123',
|
|
202
|
+
});
|
|
78
203
|
```
|
|
79
204
|
|
|
80
|
-
|
|
205
|
+
## Configuration
|
|
81
206
|
|
|
207
|
+
### Constructor Options
|
|
208
|
+
|
|
209
|
+
```typescript
|
|
210
|
+
const qualifire = new Qualifire({
|
|
211
|
+
apiKey: 'your-api-key', // Required (or set QUALIFIRE_API_KEY env var)
|
|
212
|
+
baseUrl: 'https://...', // Optional, defaults to https://proxy.qualifire.ai
|
|
213
|
+
});
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### Environment Variables
|
|
217
|
+
|
|
218
|
+
| Variable | Description |
|
|
219
|
+
|----------|-------------|
|
|
220
|
+
| `QUALIFIRE_API_KEY` | API key for authentication |
|
|
221
|
+
| `QUALIFIRE_BASE_URL` | Override the API base URL |
|
|
222
|
+
|
|
223
|
+
## Response Format
|
|
224
|
+
|
|
225
|
+
```typescript
|
|
226
|
+
interface EvaluationResponse {
|
|
227
|
+
status: 'passed' | 'failed';
|
|
228
|
+
score: number; // 0-100
|
|
229
|
+
evaluationResults: Array<{
|
|
230
|
+
type: string;
|
|
231
|
+
results: Array<{
|
|
232
|
+
name: string;
|
|
233
|
+
score: number;
|
|
234
|
+
label: string;
|
|
235
|
+
confidence_score: number;
|
|
236
|
+
reason: string;
|
|
237
|
+
}>;
|
|
238
|
+
}>;
|
|
239
|
+
}
|
|
240
|
+
```
|
|
82
241
|
|
|
242
|
+
## License
|
|
83
243
|
|
|
244
|
+
MIT
|
|
84
245
|
|
|
85
|
-
[
|
|
86
|
-
[build-url]: https://github.com/qualifire-dev/qualifire-typescript-sdk/actions/workflows/release.yml
|
|
87
|
-
[downloads-img]: https://img.shields.io/npm/dt/main/qualifire
|
|
88
|
-
[npm-url]: https://www.npmjs.com/package/qualifire
|
|
89
|
-
[issues-img]: https://img.shields.io/github/issues/qualifire-dev/develop/qualifire-typescript-sdk
|
|
246
|
+
[issues-img]: https://img.shields.io/github/issues/qualifire-dev/qualifire-typescript-sdk
|
|
90
247
|
[issues-url]: https://github.com/qualifire-dev/qualifire-typescript-sdk/issues
|
|
91
|
-
[codecov-img]: https://codecov.io/gh/qualifire-dev/
|
|
92
|
-
[codecov-url]: https://codecov.io/gh/qualifire-dev/
|
|
248
|
+
[codecov-img]: https://codecov.io/gh/qualifire-dev/qualifire-typescript-sdk/branch/main/graph/badge.svg
|
|
249
|
+
[codecov-url]: https://codecov.io/gh/qualifire-dev/qualifire-typescript-sdk
|
|
93
250
|
[semantic-release-img]: https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079.svg
|
|
94
251
|
[semantic-release-url]: https://github.com/semantic-release/semantic-release
|
|
95
252
|
[commitizen-img]: https://img.shields.io/badge/commitizen-friendly-brightgreen.svg
|
package/lib/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { EvaluationProxyAPIRequest, type EvaluationRequestV2, type EvaluationResponse } from './types';
|
|
2
|
-
export type { EvaluationProxyAPIRequest, EvaluationRequestV2, EvaluationResponse, Framework, LLMMessage, ModelMode, PolicyTarget } from './types';
|
|
2
|
+
export type { EvaluationProxyAPIRequest, EvaluationRequestV2, EvaluationResponse, Framework, LLMMessage, ModelMode, PolicyTarget, } from './types';
|
|
3
3
|
/**
|
|
4
4
|
* Represents the Qualifire SDK.
|
|
5
5
|
*/
|
|
@@ -70,7 +70,7 @@ export declare class Qualifire {
|
|
|
70
70
|
* instructionsFollowingCheck: true,
|
|
71
71
|
* piiCheck: true,
|
|
72
72
|
* promptInjections: true,
|
|
73
|
-
*
|
|
73
|
+
* toolUseQualityCheck: false, // Use this instead of deprecated toolSelectionQualityCheck
|
|
74
74
|
* });
|
|
75
75
|
*
|
|
76
76
|
* // If you are using streaming mode.
|
package/lib/index.js
CHANGED
|
@@ -104,7 +104,7 @@ class Qualifire {
|
|
|
104
104
|
* instructionsFollowingCheck: true,
|
|
105
105
|
* piiCheck: true,
|
|
106
106
|
* promptInjections: true,
|
|
107
|
-
*
|
|
107
|
+
* toolUseQualityCheck: false, // Use this instead of deprecated toolSelectionQualityCheck
|
|
108
108
|
* });
|
|
109
109
|
*
|
|
110
110
|
* // If you are using streaming mode.
|
|
@@ -209,13 +209,22 @@ class Qualifire {
|
|
|
209
209
|
messages: evaluationProxyAPIRequest.messages,
|
|
210
210
|
available_tools: evaluationProxyAPIRequest.available_tools,
|
|
211
211
|
content_moderation_check: contentModerationCheck,
|
|
212
|
-
grounding_check: evaluationProxyAPIRequest.grounding_check ||
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
212
|
+
grounding_check: evaluationProxyAPIRequest.grounding_check ||
|
|
213
|
+
evaluationProxyAPIRequest.groundingCheck,
|
|
214
|
+
hallucinations_check: evaluationProxyAPIRequest.hallucinations_check ||
|
|
215
|
+
evaluationProxyAPIRequest.hallucinationsCheck,
|
|
216
|
+
instructions_following_check: evaluationProxyAPIRequest.instructions_following_check ||
|
|
217
|
+
evaluationProxyAPIRequest.instructionsFollowingCheck,
|
|
218
|
+
pii_check: evaluationProxyAPIRequest.pii_check ||
|
|
219
|
+
evaluationProxyAPIRequest.piiCheck,
|
|
220
|
+
prompt_injections: evaluationProxyAPIRequest.prompt_injections ||
|
|
221
|
+
evaluationProxyAPIRequest.promptInjections,
|
|
222
|
+
syntax_checks: evaluationProxyAPIRequest.syntax_checks ||
|
|
223
|
+
evaluationProxyAPIRequest.syntaxChecks,
|
|
224
|
+
tool_use_quality_check: evaluationProxyAPIRequest.toolUseQualityCheck ||
|
|
225
|
+
evaluationProxyAPIRequest.toolSelectionQualityCheck ||
|
|
226
|
+
evaluationProxyAPIRequest.tool_selection_quality_check,
|
|
227
|
+
tuq_mode: evaluationProxyAPIRequest.tuqMode ?? evaluationProxyAPIRequest.tsqMode,
|
|
219
228
|
assertions: evaluationProxyAPIRequest.assertions,
|
|
220
229
|
};
|
|
221
230
|
const headers = {
|
|
@@ -244,10 +253,10 @@ class Qualifire {
|
|
|
244
253
|
EvaluationRequestV2.hateSpeechCheck ||
|
|
245
254
|
EvaluationRequestV2.sexualContentCheck;
|
|
246
255
|
const frameworkConverters = {
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
256
|
+
openai: () => new openai_converter_1.OpenAICanonicalEvaluationStrategy(),
|
|
257
|
+
vercelai: () => new vercelai_converter_1.VercelAICanonicalEvaluationStrategy(),
|
|
258
|
+
gemini: () => new gemini_converter_1.GeminiAICanonicalEvaluationStrategy(),
|
|
259
|
+
claude: () => new claude_converter_1.ClaudeCanonicalEvaluationStrategy(),
|
|
251
260
|
};
|
|
252
261
|
const supportedFrameworks = Object.keys(frameworkConverters);
|
|
253
262
|
const converterFactory = frameworkConverters[EvaluationRequestV2.framework];
|
|
@@ -267,7 +276,10 @@ class Qualifire {
|
|
|
267
276
|
pii_check: EvaluationRequestV2.piiCheck,
|
|
268
277
|
prompt_injections: EvaluationRequestV2.promptInjections,
|
|
269
278
|
syntax_checks: EvaluationRequestV2.syntaxChecks,
|
|
270
|
-
|
|
279
|
+
tool_use_quality_check: EvaluationRequestV2.toolUseQualityCheck ||
|
|
280
|
+
EvaluationRequestV2.toolSelectionQualityCheck ||
|
|
281
|
+
EvaluationRequestV2.tool_selection_quality_check,
|
|
282
|
+
tuq_mode: EvaluationRequestV2.tuqMode ?? EvaluationRequestV2.tsqMode,
|
|
271
283
|
assertions: EvaluationRequestV2.assertions,
|
|
272
284
|
};
|
|
273
285
|
const headers = {
|
package/lib/types.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
declare const FrameworkEnum: readonly ["openai", "vercelai", "gemini", "claude"];
|
|
3
|
-
export type Framework = typeof FrameworkEnum[number];
|
|
3
|
+
export type Framework = (typeof FrameworkEnum)[number];
|
|
4
4
|
declare const ModelModeEnum: readonly ["speed", "balanced", "quality"];
|
|
5
|
-
export type ModelMode = typeof ModelModeEnum[number];
|
|
5
|
+
export type ModelMode = (typeof ModelModeEnum)[number];
|
|
6
6
|
declare const PolicyTargetEnum: readonly ["input", "output", "both"];
|
|
7
|
-
export type PolicyTarget = typeof PolicyTargetEnum[number];
|
|
7
|
+
export type PolicyTarget = (typeof PolicyTargetEnum)[number];
|
|
8
8
|
export declare const messageSchema: z.ZodObject<{
|
|
9
9
|
role: z.ZodString;
|
|
10
10
|
content: z.ZodNullable<z.ZodString>;
|
|
@@ -105,6 +105,7 @@ export declare const EvaluationRequestV2Schema: z.ZodObject<{
|
|
|
105
105
|
args: z.ZodString;
|
|
106
106
|
}, z.core.$strip>>>;
|
|
107
107
|
toolSelectionQualityCheck: z.ZodOptional<z.ZodDefault<z.ZodBoolean>>;
|
|
108
|
+
toolUseQualityCheck: z.ZodOptional<z.ZodDefault<z.ZodBoolean>>;
|
|
108
109
|
assertions: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
109
110
|
available_tools: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
110
111
|
name: z.ZodString;
|
|
@@ -124,11 +125,16 @@ export declare const EvaluationRequestV2Schema: z.ZodObject<{
|
|
|
124
125
|
args: z.ZodString;
|
|
125
126
|
}, z.core.$strip>>>;
|
|
126
127
|
tool_selection_quality_check: z.ZodOptional<z.ZodDefault<z.ZodBoolean>>;
|
|
127
|
-
tsqMode: z.ZodOptional<z.
|
|
128
|
+
tsqMode: z.ZodOptional<z.ZodEnum<{
|
|
128
129
|
speed: "speed";
|
|
129
130
|
balanced: "balanced";
|
|
130
131
|
quality: "quality";
|
|
131
|
-
}
|
|
132
|
+
}>>;
|
|
133
|
+
tuqMode: z.ZodOptional<z.ZodEnum<{
|
|
134
|
+
speed: "speed";
|
|
135
|
+
balanced: "balanced";
|
|
136
|
+
quality: "quality";
|
|
137
|
+
}>>;
|
|
132
138
|
consistencyMode: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
133
139
|
speed: "speed";
|
|
134
140
|
balanced: "balanced";
|
|
@@ -202,11 +208,17 @@ export declare const EvaluationProxyAPIRequestSchema: z.ZodObject<{
|
|
|
202
208
|
args: z.ZodString;
|
|
203
209
|
}, z.core.$strip>>>;
|
|
204
210
|
toolSelectionQualityCheck: z.ZodOptional<z.ZodDefault<z.ZodBoolean>>;
|
|
205
|
-
|
|
211
|
+
toolUseQualityCheck: z.ZodOptional<z.ZodDefault<z.ZodBoolean>>;
|
|
212
|
+
tsqMode: z.ZodOptional<z.ZodEnum<{
|
|
206
213
|
speed: "speed";
|
|
207
214
|
balanced: "balanced";
|
|
208
215
|
quality: "quality";
|
|
209
|
-
}
|
|
216
|
+
}>>;
|
|
217
|
+
tuqMode: z.ZodOptional<z.ZodEnum<{
|
|
218
|
+
speed: "speed";
|
|
219
|
+
balanced: "balanced";
|
|
220
|
+
quality: "quality";
|
|
221
|
+
}>>;
|
|
210
222
|
consistencyMode: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
211
223
|
speed: "speed";
|
|
212
224
|
balanced: "balanced";
|
package/lib/types.js
CHANGED
|
@@ -78,7 +78,9 @@ exports.EvaluationRequestV2Schema = zod_1.z.object({
|
|
|
78
78
|
sexualContentCheck: zod_1.z.boolean().default(false).optional(),
|
|
79
79
|
contentModerationCheck: zod_1.z.boolean().default(false).optional(),
|
|
80
80
|
syntaxChecks: zod_1.z.record(zod_1.z.string(), SyntaxCheckArgsSchema).optional(),
|
|
81
|
+
/** @deprecated Use toolUseQualityCheck instead */
|
|
81
82
|
toolSelectionQualityCheck: zod_1.z.boolean().default(false).optional(),
|
|
83
|
+
toolUseQualityCheck: zod_1.z.boolean().default(false).optional(),
|
|
82
84
|
assertions: zod_1.z.array(zod_1.z.string()).optional(),
|
|
83
85
|
/** @deprecated Automatically added from the request*/
|
|
84
86
|
available_tools: zod_1.z.array(exports.LLMToolDefinitionSchema).optional(),
|
|
@@ -102,9 +104,11 @@ exports.EvaluationRequestV2Schema = zod_1.z.object({
|
|
|
102
104
|
sexual_content_check: zod_1.z.boolean().default(false).optional(),
|
|
103
105
|
/** @deprecated Use syntaxChecks instead */
|
|
104
106
|
syntax_checks: zod_1.z.record(zod_1.z.string(), SyntaxCheckArgsSchema).optional(),
|
|
105
|
-
/** @deprecated Use
|
|
107
|
+
/** @deprecated Use toolUseQualityCheck instead */
|
|
106
108
|
tool_selection_quality_check: zod_1.z.boolean().default(false).optional(),
|
|
107
|
-
|
|
109
|
+
/** @deprecated Use tuqMode instead */
|
|
110
|
+
tsqMode: zod_1.z.enum(ModelModeEnum).optional(),
|
|
111
|
+
tuqMode: zod_1.z.enum(ModelModeEnum).optional(),
|
|
108
112
|
consistencyMode: zod_1.z.enum(ModelModeEnum).default('balanced').optional(),
|
|
109
113
|
assertionsMode: zod_1.z.enum(ModelModeEnum).default('balanced').optional(),
|
|
110
114
|
groundingMode: zod_1.z.enum(ModelModeEnum).default('balanced').optional(),
|
|
@@ -161,8 +165,12 @@ exports.EvaluationProxyAPIRequestSchema = zod_1.z
|
|
|
161
165
|
sexualContentCheck: zod_1.z.boolean().default(false).optional(),
|
|
162
166
|
contentModerationCheck: zod_1.z.boolean().default(false).optional(),
|
|
163
167
|
syntaxChecks: zod_1.z.record(zod_1.z.string(), SyntaxCheckArgsSchema).optional(),
|
|
168
|
+
/** @deprecated Use toolUseQualityCheck instead */
|
|
164
169
|
toolSelectionQualityCheck: zod_1.z.boolean().default(false).optional(),
|
|
165
|
-
|
|
170
|
+
toolUseQualityCheck: zod_1.z.boolean().default(false).optional(),
|
|
171
|
+
/** @deprecated Use tuqMode instead */
|
|
172
|
+
tsqMode: zod_1.z.enum(ModelModeEnum).optional(),
|
|
173
|
+
tuqMode: zod_1.z.enum(ModelModeEnum).optional(),
|
|
166
174
|
consistencyMode: zod_1.z.enum(ModelModeEnum).default('balanced').optional(),
|
|
167
175
|
assertionsMode: zod_1.z.enum(ModelModeEnum).default('balanced').optional(),
|
|
168
176
|
groundingMode: zod_1.z.enum(ModelModeEnum).default('balanced').optional(),
|
|
@@ -183,20 +191,22 @@ exports.EvaluationProxyAPIRequestSchema = zod_1.z
|
|
|
183
191
|
path: [], // Top level
|
|
184
192
|
});
|
|
185
193
|
}
|
|
186
|
-
// Validation: tool_selection_quality_check requires messages and available_tools
|
|
187
|
-
if (data.tool_selection_quality_check
|
|
194
|
+
// Validation: tool_selection_quality_check or toolSelectionQualityCheck or toolUseQualityCheck requires messages and available_tools
|
|
195
|
+
if (data.tool_selection_quality_check ||
|
|
196
|
+
data.toolSelectionQualityCheck ||
|
|
197
|
+
data.toolUseQualityCheck) {
|
|
188
198
|
const hasAvailableTools = Array.isArray(data.available_tools) && data.available_tools.length > 0;
|
|
189
199
|
if (!hasMessages) {
|
|
190
200
|
ctx.addIssue({
|
|
191
201
|
code: zod_1.z.ZodIssueCode.custom,
|
|
192
|
-
message: 'messages must be provided when
|
|
202
|
+
message: 'messages must be provided when tool quality check is enabled',
|
|
193
203
|
path: ['messages'],
|
|
194
204
|
});
|
|
195
205
|
}
|
|
196
206
|
if (!hasAvailableTools) {
|
|
197
207
|
ctx.addIssue({
|
|
198
208
|
code: zod_1.z.ZodIssueCode.custom,
|
|
199
|
-
message: 'available_tools must be provided when
|
|
209
|
+
message: 'available_tools must be provided when tool quality check is enabled',
|
|
200
210
|
path: ['available_tools'],
|
|
201
211
|
});
|
|
202
212
|
}
|