qualifire 1.2.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +206 -49
- package/lib/frameworks/canonical.d.ts +4 -0
- package/lib/frameworks/canonical.js +2 -0
- package/lib/frameworks/claude/claude-converter.d.ts +15 -0
- package/lib/frameworks/claude/claude-converter.js +229 -0
- package/lib/frameworks/gemini/gemini-converter.d.ts +11 -0
- package/lib/frameworks/gemini/gemini-converter.js +241 -0
- package/lib/frameworks/openai/openai-converter.d.ts +28 -0
- package/lib/frameworks/openai/openai-converter.js +522 -0
- package/lib/frameworks/vercelai/vercelai-converter.d.ts +13 -0
- package/lib/frameworks/vercelai/vercelai-converter.js +258 -0
- package/lib/index.d.ts +124 -16
- package/lib/index.js +224 -18
- package/lib/types.d.ts +194 -367
- package/lib/types.js +125 -10
- package/package.json +20 -18
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Qualifire
|
|
1
|
+
# Qualifire SDK
|
|
2
2
|
|
|
3
3
|
[](https://github.com/qualifire-dev/qualifire-typescript-sdk/actions/workflows/codeql-analysis.yml)
|
|
4
4
|
[](https://github.com/qualifire-dev/qualifire-typescript-sdk/actions/workflows/release.yml)
|
|
@@ -7,9 +7,7 @@ Qualifire
|
|
|
7
7
|
[![Commitizen Friendly][commitizen-img]][commitizen-url]
|
|
8
8
|
[![Semantic Release][semantic-release-img]][semantic-release-url]
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
This is the official SDK for interacting with the Qualifire API.
|
|
10
|
+
The official TypeScript SDK for evaluating LLM outputs with [Qualifire](https://qualifire.ai). Detect hallucinations, prompt injections, PII leakage, content policy violations, and more.
|
|
13
11
|
|
|
14
12
|
## Installation
|
|
15
13
|
|
|
@@ -17,79 +15,238 @@ This is the official SDK for interacting with the Qualifire API.
|
|
|
17
15
|
npm install qualifire
|
|
18
16
|
```
|
|
19
17
|
|
|
20
|
-
##
|
|
18
|
+
## Quick Start
|
|
19
|
+
|
|
20
|
+
```typescript
|
|
21
|
+
import { Qualifire } from 'qualifire';
|
|
22
|
+
import OpenAI from 'openai';
|
|
23
|
+
|
|
24
|
+
const qualifire = new Qualifire({ apiKey: 'your-api-key' });
|
|
25
|
+
const openai = new OpenAI();
|
|
26
|
+
|
|
27
|
+
// Make your LLM call
|
|
28
|
+
const request = {
|
|
29
|
+
model: 'gpt-4o',
|
|
30
|
+
messages: [
|
|
31
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
32
|
+
{ role: 'user', content: 'What is the capital of France?' },
|
|
33
|
+
],
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const response = await openai.chat.completions.create(request);
|
|
21
37
|
|
|
22
|
-
|
|
38
|
+
// Evaluate the response
|
|
39
|
+
const evaluation = await qualifire.evaluate({
|
|
40
|
+
framework: 'openai',
|
|
41
|
+
request,
|
|
42
|
+
response,
|
|
43
|
+
hallucinationsCheck: true,
|
|
44
|
+
groundingCheck: true,
|
|
45
|
+
});
|
|
23
46
|
|
|
24
|
-
|
|
25
|
-
|
|
47
|
+
console.log(evaluation);
|
|
48
|
+
// {
|
|
49
|
+
// status: 'passed',
|
|
50
|
+
// score: 100,
|
|
51
|
+
// evaluationResults: [...]
|
|
52
|
+
// }
|
|
26
53
|
```
|
|
27
54
|
|
|
28
|
-
|
|
55
|
+
## Supported Frameworks
|
|
29
56
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
57
|
+
| Framework | Value | SDK |
|
|
58
|
+
|-----------|-------|-----|
|
|
59
|
+
| OpenAI | `openai` | `openai` (Chat Completions & Responses API) |
|
|
60
|
+
| Anthropic Claude | `claude` | `@anthropic-ai/sdk` |
|
|
61
|
+
| Google Gemini | `gemini` | `@google/genai` |
|
|
62
|
+
| Vercel AI SDK | `vercelai` | `ai` |
|
|
63
|
+
|
|
64
|
+
All frameworks support both streaming and non-streaming responses.
|
|
65
|
+
|
|
66
|
+
## Available Evaluation Checks
|
|
67
|
+
|
|
68
|
+
| Check | Parameter | Description |
|
|
69
|
+
|-------|-----------|-------------|
|
|
70
|
+
| Hallucinations | `hallucinationsCheck` | Detect fabricated information |
|
|
71
|
+
| Grounding | `groundingCheck` | Verify responses are grounded in context |
|
|
72
|
+
| Prompt Injections | `promptInjections` | Detect prompt injection attempts |
|
|
73
|
+
| PII Detection | `piiCheck` | Identify personally identifiable information |
|
|
74
|
+
| Content Moderation | `contentModerationCheck` | Flag harmful content |
|
|
75
|
+
| Instructions Following | `instructionsFollowingCheck` | Verify adherence to system instructions |
|
|
76
|
+
| Tool Selection Quality | `toolSelectionQualityCheck` | Evaluate tool/function call accuracy |
|
|
77
|
+
| Custom Assertions | `assertions` | Array of custom assertion strings |
|
|
78
|
+
|
|
79
|
+
## Framework Examples
|
|
80
|
+
|
|
81
|
+
### OpenAI
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
// Chat Completions API
|
|
85
|
+
const request = {
|
|
86
|
+
model: 'gpt-4o',
|
|
87
|
+
messages: [{ role: 'user', content: 'Hello!' }],
|
|
88
|
+
};
|
|
89
|
+
const response = await openai.chat.completions.create(request);
|
|
90
|
+
|
|
91
|
+
await qualifire.evaluate({
|
|
92
|
+
framework: 'openai',
|
|
93
|
+
request,
|
|
94
|
+
response,
|
|
95
|
+
hallucinationsCheck: true,
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// Streaming
|
|
99
|
+
const streamRequest = { ...request, stream: true };
|
|
100
|
+
const stream = await openai.chat.completions.create(streamRequest);
|
|
101
|
+
|
|
102
|
+
const chunks = [];
|
|
103
|
+
for await (const chunk of stream) {
|
|
104
|
+
chunks.push(chunk);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
await qualifire.evaluate({
|
|
108
|
+
framework: 'openai',
|
|
109
|
+
request: streamRequest,
|
|
110
|
+
response: chunks,
|
|
111
|
+
hallucinationsCheck: true,
|
|
33
112
|
});
|
|
34
113
|
```
|
|
35
114
|
|
|
36
|
-
|
|
115
|
+
### Anthropic Claude
|
|
37
116
|
|
|
38
|
-
|
|
117
|
+
```typescript
|
|
118
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
39
119
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
content: 'this is my awesome request',
|
|
47
|
-
},
|
|
48
|
-
],
|
|
120
|
+
const anthropic = new Anthropic();
|
|
121
|
+
|
|
122
|
+
const request = {
|
|
123
|
+
model: 'claude-sonnet-4-20250514',
|
|
124
|
+
max_tokens: 1024,
|
|
125
|
+
messages: [{ role: 'user', content: 'Hello!' }],
|
|
49
126
|
};
|
|
127
|
+
const response = await anthropic.messages.create(request);
|
|
50
128
|
|
|
51
|
-
|
|
129
|
+
await qualifire.evaluate({
|
|
130
|
+
framework: 'claude',
|
|
131
|
+
request,
|
|
132
|
+
response,
|
|
133
|
+
promptInjections: true,
|
|
134
|
+
});
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Google Gemini
|
|
138
|
+
|
|
139
|
+
```typescript
|
|
140
|
+
import { GoogleGenAI } from '@google/genai';
|
|
141
|
+
|
|
142
|
+
const genai = new GoogleGenAI({ apiKey: 'your-key' });
|
|
143
|
+
|
|
144
|
+
const request = {
|
|
145
|
+
model: 'gemini-2.0-flash',
|
|
146
|
+
contents: [{ role: 'user', parts: [{ text: 'Hello!' }] }],
|
|
147
|
+
};
|
|
148
|
+
const response = await genai.models.generateContent(request);
|
|
149
|
+
|
|
150
|
+
await qualifire.evaluate({
|
|
151
|
+
framework: 'gemini',
|
|
152
|
+
request,
|
|
153
|
+
response,
|
|
154
|
+
contentModerationCheck: true,
|
|
155
|
+
});
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### Vercel AI SDK
|
|
159
|
+
|
|
160
|
+
```typescript
|
|
161
|
+
import { generateText } from 'ai';
|
|
162
|
+
import { openai } from '@ai-sdk/openai';
|
|
163
|
+
|
|
164
|
+
const request = {
|
|
165
|
+
model: openai('gpt-4o'),
|
|
166
|
+
prompt: 'Hello!',
|
|
167
|
+
};
|
|
168
|
+
const response = await generateText(request);
|
|
52
169
|
|
|
53
|
-
|
|
54
|
-
|
|
170
|
+
await qualifire.evaluate({
|
|
171
|
+
framework: 'vercelai',
|
|
172
|
+
request,
|
|
173
|
+
response,
|
|
174
|
+
piiCheck: true,
|
|
175
|
+
});
|
|
55
176
|
```
|
|
56
177
|
|
|
57
|
-
|
|
178
|
+
## Direct Message Mode
|
|
58
179
|
|
|
59
|
-
|
|
180
|
+
For cases where you don't use a supported framework, pass messages directly:
|
|
60
181
|
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
model: 'gpt-3.5-turbo',
|
|
182
|
+
```typescript
|
|
183
|
+
await qualifire.evaluate({
|
|
64
184
|
messages: [
|
|
65
|
-
{
|
|
66
|
-
|
|
67
|
-
content: 'this is my awesome request',
|
|
68
|
-
},
|
|
185
|
+
{ role: 'user', content: 'What is 2+2?' },
|
|
186
|
+
{ role: 'assistant', content: 'The answer is 4.' },
|
|
69
187
|
],
|
|
70
|
-
|
|
188
|
+
hallucinationsCheck: true,
|
|
189
|
+
groundingCheck: true,
|
|
190
|
+
});
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Invoke Pre-configured Evaluations
|
|
71
194
|
|
|
72
|
-
|
|
195
|
+
Run evaluations configured in the Qualifire dashboard:
|
|
73
196
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
197
|
+
```typescript
|
|
198
|
+
const result = await qualifire.invokeEvaluation({
|
|
199
|
+
input: 'What is the capital of France?',
|
|
200
|
+
output: 'Paris is the capital of France.',
|
|
201
|
+
evaluationId: 'eval-123',
|
|
202
|
+
});
|
|
78
203
|
```
|
|
79
204
|
|
|
80
|
-
|
|
205
|
+
## Configuration
|
|
81
206
|
|
|
207
|
+
### Constructor Options
|
|
208
|
+
|
|
209
|
+
```typescript
|
|
210
|
+
const qualifire = new Qualifire({
|
|
211
|
+
apiKey: 'your-api-key', // Required (or set QUALIFIRE_API_KEY env var)
|
|
212
|
+
baseUrl: 'https://...', // Optional, defaults to https://proxy.qualifire.ai
|
|
213
|
+
});
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
### Environment Variables
|
|
217
|
+
|
|
218
|
+
| Variable | Description |
|
|
219
|
+
|----------|-------------|
|
|
220
|
+
| `QUALIFIRE_API_KEY` | API key for authentication |
|
|
221
|
+
| `QUALIFIRE_BASE_URL` | Override the API base URL |
|
|
222
|
+
|
|
223
|
+
## Response Format
|
|
224
|
+
|
|
225
|
+
```typescript
|
|
226
|
+
interface EvaluationResponse {
|
|
227
|
+
status: 'passed' | 'failed';
|
|
228
|
+
score: number; // 0-100
|
|
229
|
+
evaluationResults: Array<{
|
|
230
|
+
type: string;
|
|
231
|
+
results: Array<{
|
|
232
|
+
name: string;
|
|
233
|
+
score: number;
|
|
234
|
+
label: string;
|
|
235
|
+
confidence_score: number;
|
|
236
|
+
reason: string;
|
|
237
|
+
}>;
|
|
238
|
+
}>;
|
|
239
|
+
}
|
|
240
|
+
```
|
|
82
241
|
|
|
242
|
+
## License
|
|
83
243
|
|
|
244
|
+
MIT
|
|
84
245
|
|
|
85
|
-
[
|
|
86
|
-
[build-url]: https://github.com/qualifire-dev/qualifire-typescript-sdk/actions/workflows/release.yml
|
|
87
|
-
[downloads-img]: https://img.shields.io/npm/dt/main/qualifire
|
|
88
|
-
[npm-url]: https://www.npmjs.com/package/qualifire
|
|
89
|
-
[issues-img]: https://img.shields.io/github/issues/qualifire-dev/develop/qualifire-typescript-sdk
|
|
246
|
+
[issues-img]: https://img.shields.io/github/issues/qualifire-dev/qualifire-typescript-sdk
|
|
90
247
|
[issues-url]: https://github.com/qualifire-dev/qualifire-typescript-sdk/issues
|
|
91
|
-
[codecov-img]: https://codecov.io/gh/qualifire-dev/
|
|
92
|
-
[codecov-url]: https://codecov.io/gh/qualifire-dev/
|
|
248
|
+
[codecov-img]: https://codecov.io/gh/qualifire-dev/qualifire-typescript-sdk/branch/main/graph/badge.svg
|
|
249
|
+
[codecov-url]: https://codecov.io/gh/qualifire-dev/qualifire-typescript-sdk
|
|
93
250
|
[semantic-release-img]: https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079.svg
|
|
94
251
|
[semantic-release-url]: https://github.com/semantic-release/semantic-release
|
|
95
252
|
[commitizen-img]: https://img.shields.io/badge/commitizen-friendly-brightgreen.svg
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { Message, type MessageCreateParams, type MessageStreamParams } from '@anthropic-ai/sdk/resources';
|
|
2
|
+
import { RawMessageStreamEvent } from '@anthropic-ai/sdk/resources/messages';
|
|
3
|
+
import { EvaluationProxyAPIRequest } from '../../types';
|
|
4
|
+
import { CanonicalEvaluationStrategy } from '../canonical';
|
|
5
|
+
type AnthropicCreateAPIResponsesType = Message | RawMessageStreamEvent;
|
|
6
|
+
type AnthropicAPIRequestsType = MessageCreateParams;
|
|
7
|
+
type AnthropicAPIResponsesType = AnthropicCreateAPIResponsesType | MessageStreamParams;
|
|
8
|
+
export declare class ClaudeCanonicalEvaluationStrategy implements CanonicalEvaluationStrategy<AnthropicAPIRequestsType, AnthropicAPIResponsesType> {
|
|
9
|
+
convertToQualifireEvaluationRequest(request: AnthropicAPIRequestsType, response: AnthropicAPIResponsesType): Promise<EvaluationProxyAPIRequest>;
|
|
10
|
+
convertRequest(request: any): EvaluationProxyAPIRequest;
|
|
11
|
+
private handleStreaming;
|
|
12
|
+
private handleNonStreamingResponse;
|
|
13
|
+
private convertClaudeMessagesToLLMMessages;
|
|
14
|
+
}
|
|
15
|
+
export {};
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ClaudeCanonicalEvaluationStrategy = void 0;
|
|
4
|
+
class ClaudeCanonicalEvaluationStrategy {
|
|
5
|
+
async convertToQualifireEvaluationRequest(request, response) {
|
|
6
|
+
const { messages: requestMessages, available_tools: requestAvailableTools, } = this.convertRequest(request);
|
|
7
|
+
const messages = requestMessages || [];
|
|
8
|
+
const availableTools = requestAvailableTools || [];
|
|
9
|
+
// Avoid undefined response
|
|
10
|
+
if (!response) {
|
|
11
|
+
return {
|
|
12
|
+
messages,
|
|
13
|
+
available_tools: availableTools,
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
// Check if response is streaming or non-streaming
|
|
17
|
+
if (Array.isArray(response)) {
|
|
18
|
+
const streamingResultMessages = await this.handleStreaming(response);
|
|
19
|
+
messages.push(...streamingResultMessages);
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
const nonStreamingResultMessages = await this.handleNonStreamingResponse(response);
|
|
23
|
+
messages.push(...nonStreamingResultMessages);
|
|
24
|
+
}
|
|
25
|
+
return {
|
|
26
|
+
messages,
|
|
27
|
+
available_tools: availableTools,
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
convertRequest(request) {
|
|
31
|
+
const messages = [];
|
|
32
|
+
const availableTools = [];
|
|
33
|
+
// Handle Claude system message first (if present)
|
|
34
|
+
if (request?.system) {
|
|
35
|
+
messages.push({
|
|
36
|
+
role: 'system',
|
|
37
|
+
content: request.system,
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
// Handle Claude request messages
|
|
41
|
+
if (request?.messages) {
|
|
42
|
+
messages.push(...this.convertClaudeMessagesToLLMMessages(request.messages));
|
|
43
|
+
}
|
|
44
|
+
// Handle tools
|
|
45
|
+
if (request?.tools) {
|
|
46
|
+
for (const tool of request.tools) {
|
|
47
|
+
availableTools.push({
|
|
48
|
+
name: tool.name,
|
|
49
|
+
description: tool.description,
|
|
50
|
+
parameters: tool.input_schema?.properties || {},
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return {
|
|
55
|
+
messages,
|
|
56
|
+
available_tools: availableTools,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
async handleStreaming(response) {
|
|
60
|
+
const messages = [];
|
|
61
|
+
let role;
|
|
62
|
+
let accumulatedContent = [];
|
|
63
|
+
let accumulatedToolName;
|
|
64
|
+
let accumulatedToolId;
|
|
65
|
+
let accumulatedToolInput = [];
|
|
66
|
+
for (const responseEvent of response) {
|
|
67
|
+
switch (responseEvent.type) {
|
|
68
|
+
case 'message_start':
|
|
69
|
+
const rawMessageStartEvent = responseEvent;
|
|
70
|
+
role = rawMessageStartEvent.message.role;
|
|
71
|
+
accumulatedContent = [];
|
|
72
|
+
accumulatedToolName = undefined;
|
|
73
|
+
accumulatedToolId = undefined;
|
|
74
|
+
accumulatedToolInput = [];
|
|
75
|
+
break;
|
|
76
|
+
case 'content_block_start':
|
|
77
|
+
const rawContentBlockStartEvent = responseEvent;
|
|
78
|
+
switch (rawContentBlockStartEvent.content_block.type) {
|
|
79
|
+
case 'text':
|
|
80
|
+
const textBlock = rawContentBlockStartEvent.content_block;
|
|
81
|
+
accumulatedContent.push(textBlock.text);
|
|
82
|
+
break;
|
|
83
|
+
case 'tool_use':
|
|
84
|
+
const toolUseBlock = rawContentBlockStartEvent.content_block;
|
|
85
|
+
accumulatedToolId = toolUseBlock.id;
|
|
86
|
+
accumulatedToolName = toolUseBlock.name;
|
|
87
|
+
accumulatedToolInput = [];
|
|
88
|
+
break;
|
|
89
|
+
case 'thinking':
|
|
90
|
+
const thinkingBlock = rawContentBlockStartEvent.content_block;
|
|
91
|
+
accumulatedContent.push(thinkingBlock.thinking);
|
|
92
|
+
break;
|
|
93
|
+
default:
|
|
94
|
+
console.debug(`Invalid content block type: ${responseEvent}`);
|
|
95
|
+
}
|
|
96
|
+
break;
|
|
97
|
+
case 'content_block_delta':
|
|
98
|
+
const rawContentBlockDeltaEvent = responseEvent;
|
|
99
|
+
switch (rawContentBlockDeltaEvent.delta.type) {
|
|
100
|
+
case 'text_delta':
|
|
101
|
+
const textDelta = rawContentBlockDeltaEvent.delta;
|
|
102
|
+
accumulatedContent.push(textDelta.text);
|
|
103
|
+
break;
|
|
104
|
+
case 'input_json_delta':
|
|
105
|
+
const inputJsonDelta = rawContentBlockDeltaEvent.delta;
|
|
106
|
+
accumulatedToolInput.push(inputJsonDelta.partial_json);
|
|
107
|
+
break;
|
|
108
|
+
default:
|
|
109
|
+
console.debug(`Invalid delta type: ${rawContentBlockDeltaEvent}`);
|
|
110
|
+
}
|
|
111
|
+
break;
|
|
112
|
+
case 'message_stop':
|
|
113
|
+
let finalContent;
|
|
114
|
+
if (accumulatedContent.length > 0) {
|
|
115
|
+
finalContent = accumulatedContent.join('').trim();
|
|
116
|
+
}
|
|
117
|
+
let finalTool;
|
|
118
|
+
if (accumulatedToolName) {
|
|
119
|
+
finalTool = {
|
|
120
|
+
id: accumulatedToolId,
|
|
121
|
+
name: accumulatedToolName,
|
|
122
|
+
arguments: JSON.parse(accumulatedToolInput.join('')),
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
;
|
|
126
|
+
if (!role) {
|
|
127
|
+
console.debug('role was not set');
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
messages.push({
|
|
131
|
+
role: role == 'model' ? 'assistant' : role,
|
|
132
|
+
content: finalContent ?? undefined,
|
|
133
|
+
tool_calls: finalTool ? [finalTool] : undefined,
|
|
134
|
+
});
|
|
135
|
+
role = undefined;
|
|
136
|
+
accumulatedContent = [];
|
|
137
|
+
accumulatedToolName = undefined;
|
|
138
|
+
accumulatedToolId = undefined;
|
|
139
|
+
accumulatedToolInput = [];
|
|
140
|
+
break;
|
|
141
|
+
case 'content_block_stop':
|
|
142
|
+
case 'message_delta':
|
|
143
|
+
break;
|
|
144
|
+
default:
|
|
145
|
+
console.debug(`Invalid event: ${responseEvent}`);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return messages;
|
|
149
|
+
}
|
|
150
|
+
async handleNonStreamingResponse(response) {
|
|
151
|
+
const messages = [];
|
|
152
|
+
if (response.role !== 'assistant') {
|
|
153
|
+
throw new Error(`Response role must be 'assistant'. Make sure to use response
|
|
154
|
+
from anthropic.messages.create() when not using streaming.`);
|
|
155
|
+
}
|
|
156
|
+
messages.push(...this.convertClaudeMessagesToLLMMessages([response]));
|
|
157
|
+
return messages;
|
|
158
|
+
}
|
|
159
|
+
// Claude-specific function to convert Response API messages to LLM messages
|
|
160
|
+
convertClaudeMessagesToLLMMessages(messages) {
|
|
161
|
+
const extractedMessages = [];
|
|
162
|
+
for (const message of messages) {
|
|
163
|
+
if (typeof message.content === 'string') {
|
|
164
|
+
const llmMessage = {
|
|
165
|
+
role: message.role,
|
|
166
|
+
content: message.content,
|
|
167
|
+
};
|
|
168
|
+
extractedMessages.push(llmMessage);
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
const aggregatedContent = [];
|
|
172
|
+
const aggregatedToolCalls = [];
|
|
173
|
+
let role = message.role;
|
|
174
|
+
if (!message.content) {
|
|
175
|
+
continue;
|
|
176
|
+
}
|
|
177
|
+
for (const part of message.content) {
|
|
178
|
+
switch (part.type) {
|
|
179
|
+
case 'tool_use':
|
|
180
|
+
const toolUseBlock = part;
|
|
181
|
+
aggregatedToolCalls.push({
|
|
182
|
+
name: toolUseBlock.name,
|
|
183
|
+
arguments: toolUseBlock.input,
|
|
184
|
+
id: toolUseBlock.id,
|
|
185
|
+
});
|
|
186
|
+
break;
|
|
187
|
+
case 'tool_result':
|
|
188
|
+
role = 'tool'; // Claude expects 'user' role for tool results. But Qualifire treats tool as results as it is sent from 'tool'
|
|
189
|
+
const toolResultBlock = part;
|
|
190
|
+
if (typeof toolResultBlock.content === 'string') {
|
|
191
|
+
aggregatedContent.push(toolResultBlock.content);
|
|
192
|
+
}
|
|
193
|
+
else {
|
|
194
|
+
toolResultBlock.content.filter(part => part.type === 'text').forEach(part => {
|
|
195
|
+
const textPart = part;
|
|
196
|
+
aggregatedContent.push(textPart.text);
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
break;
|
|
200
|
+
case 'text':
|
|
201
|
+
const textBlock = part;
|
|
202
|
+
aggregatedContent.push(textBlock.text);
|
|
203
|
+
break;
|
|
204
|
+
default:
|
|
205
|
+
console.debug('Invalid Claude output: message - ' +
|
|
206
|
+
JSON.stringify(message) +
|
|
207
|
+
' part - ' +
|
|
208
|
+
JSON.stringify(part));
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
// If we accumulated aggregatedContent or aggregatedToolCalls, add the message
|
|
212
|
+
if (aggregatedContent.length > 0 || aggregatedToolCalls.length > 0) {
|
|
213
|
+
const accumulatedMessage = {
|
|
214
|
+
role,
|
|
215
|
+
};
|
|
216
|
+
if (aggregatedContent.length > 0) {
|
|
217
|
+
accumulatedMessage.content = aggregatedContent.join('');
|
|
218
|
+
}
|
|
219
|
+
// Only add aggregatedToolCalls property for assistant messages
|
|
220
|
+
if (aggregatedToolCalls.length > 0) {
|
|
221
|
+
accumulatedMessage.tool_calls = aggregatedToolCalls;
|
|
222
|
+
}
|
|
223
|
+
extractedMessages.push(accumulatedMessage);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
return extractedMessages;
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
exports.ClaudeCanonicalEvaluationStrategy = ClaudeCanonicalEvaluationStrategy;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { EvaluationProxyAPIRequest } from '../../types';
|
|
2
|
+
import { CanonicalEvaluationStrategy } from '../canonical';
|
|
3
|
+
type GeminiAICanonicalEvaluationStrategyResponse = any;
|
|
4
|
+
type GeminiAICanonicalEvaluationStrategyRequest = any;
|
|
5
|
+
export declare class GeminiAICanonicalEvaluationStrategy implements CanonicalEvaluationStrategy<GeminiAICanonicalEvaluationStrategyRequest, GeminiAICanonicalEvaluationStrategyResponse> {
|
|
6
|
+
convertToQualifireEvaluationRequest(request: GeminiAICanonicalEvaluationStrategyRequest, response: GeminiAICanonicalEvaluationStrategyResponse): Promise<EvaluationProxyAPIRequest>;
|
|
7
|
+
convertRequest(request: GeminiAICanonicalEvaluationStrategyRequest): Promise<EvaluationProxyAPIRequest>;
|
|
8
|
+
private handleNonStreamingResponse;
|
|
9
|
+
private handleStreaming;
|
|
10
|
+
}
|
|
11
|
+
export {};
|