qualifire 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ import { EvaluationProxyAPIRequest } from '../types';
2
+ export interface CanonicalEvaluationStrategy<RequestType, ResponseType> {
3
+ convertToQualifireEvaluationRequest(request: RequestType, response: ResponseType): Promise<EvaluationProxyAPIRequest>;
4
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,15 @@
1
+ import { Message, type MessageCreateParams, type MessageStreamParams } from '@anthropic-ai/sdk/resources';
2
+ import { RawMessageStreamEvent } from '@anthropic-ai/sdk/resources/messages';
3
+ import { EvaluationProxyAPIRequest } from '../../types';
4
+ import { CanonicalEvaluationStrategy } from '../canonical';
5
+ type AnthropicCreateAPIResponsesType = Message | RawMessageStreamEvent;
6
+ type AnthropicAPIRequestsType = MessageCreateParams;
7
+ type AnthropicAPIResponsesType = AnthropicCreateAPIResponsesType | MessageStreamParams;
8
+ export declare class ClaudeCanonicalEvaluationStrategy implements CanonicalEvaluationStrategy<AnthropicAPIRequestsType, AnthropicAPIResponsesType> {
9
+ convertToQualifireEvaluationRequest(request: AnthropicAPIRequestsType, response: AnthropicAPIResponsesType): Promise<EvaluationProxyAPIRequest>;
10
+ convertRequest(request: any): EvaluationProxyAPIRequest;
11
+ private handleStreaming;
12
+ private handleNonStreamingResponse;
13
+ private convertClaudeMessagesToLLMMessages;
14
+ }
15
+ export {};
@@ -0,0 +1,229 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ClaudeCanonicalEvaluationStrategy = void 0;
4
+ class ClaudeCanonicalEvaluationStrategy {
5
+ async convertToQualifireEvaluationRequest(request, response) {
6
+ const { messages: requestMessages, available_tools: requestAvailableTools, } = this.convertRequest(request);
7
+ const messages = requestMessages || [];
8
+ const availableTools = requestAvailableTools || [];
9
+ // Avoid undefined response
10
+ if (!response) {
11
+ return {
12
+ messages,
13
+ available_tools: availableTools,
14
+ };
15
+ }
16
+ // Check if response is streaming or non-streaming
17
+ if (Array.isArray(response)) {
18
+ const streamingResultMessages = await this.handleStreaming(response);
19
+ messages.push(...streamingResultMessages);
20
+ }
21
+ else {
22
+ const nonStreamingResultMessages = await this.handleNonStreamingResponse(response);
23
+ messages.push(...nonStreamingResultMessages);
24
+ }
25
+ return {
26
+ messages,
27
+ available_tools: availableTools,
28
+ };
29
+ }
30
+ convertRequest(request) {
31
+ const messages = [];
32
+ const availableTools = [];
33
+ // Handle Claude system message first (if present)
34
+ if (request?.system) {
35
+ messages.push({
36
+ role: 'system',
37
+ content: request.system,
38
+ });
39
+ }
40
+ // Handle Claude request messages
41
+ if (request?.messages) {
42
+ messages.push(...this.convertClaudeMessagesToLLMMessages(request.messages));
43
+ }
44
+ // Handle tools
45
+ if (request?.tools) {
46
+ for (const tool of request.tools) {
47
+ availableTools.push({
48
+ name: tool.name,
49
+ description: tool.description,
50
+ parameters: tool.input_schema?.properties || {},
51
+ });
52
+ }
53
+ }
54
+ return {
55
+ messages,
56
+ available_tools: availableTools,
57
+ };
58
+ }
59
+ async handleStreaming(response) {
60
+ const messages = [];
61
+ let role;
62
+ let accumulatedContent = [];
63
+ let accumulatedToolName;
64
+ let accumulatedToolId;
65
+ let accumulatedToolInput = [];
66
+ for (const responseEvent of response) {
67
+ switch (responseEvent.type) {
68
+ case 'message_start':
69
+ const rawMessageStartEvent = responseEvent;
70
+ role = rawMessageStartEvent.message.role;
71
+ accumulatedContent = [];
72
+ accumulatedToolName = undefined;
73
+ accumulatedToolId = undefined;
74
+ accumulatedToolInput = [];
75
+ break;
76
+ case 'content_block_start':
77
+ const rawContentBlockStartEvent = responseEvent;
78
+ switch (rawContentBlockStartEvent.content_block.type) {
79
+ case 'text':
80
+ const textBlock = rawContentBlockStartEvent.content_block;
81
+ accumulatedContent.push(textBlock.text);
82
+ break;
83
+ case 'tool_use':
84
+ const toolUseBlock = rawContentBlockStartEvent.content_block;
85
+ accumulatedToolId = toolUseBlock.id;
86
+ accumulatedToolName = toolUseBlock.name;
87
+ accumulatedToolInput = [];
88
+ break;
89
+ case 'thinking':
90
+ const thinkingBlock = rawContentBlockStartEvent.content_block;
91
+ accumulatedContent.push(thinkingBlock.thinking);
92
+ break;
93
+ default:
94
+ console.debug(`Invalid content block type: ${responseEvent}`);
95
+ }
96
+ break;
97
+ case 'content_block_delta':
98
+ const rawContentBlockDeltaEvent = responseEvent;
99
+ switch (rawContentBlockDeltaEvent.delta.type) {
100
+ case 'text_delta':
101
+ const textDelta = rawContentBlockDeltaEvent.delta;
102
+ accumulatedContent.push(textDelta.text);
103
+ break;
104
+ case 'input_json_delta':
105
+ const inputJsonDelta = rawContentBlockDeltaEvent.delta;
106
+ accumulatedToolInput.push(inputJsonDelta.partial_json);
107
+ break;
108
+ default:
109
+ console.debug(`Invalid delta type: ${rawContentBlockDeltaEvent}`);
110
+ }
111
+ break;
112
+ case 'message_stop':
113
+ let finalContent;
114
+ if (accumulatedContent.length > 0) {
115
+ finalContent = accumulatedContent.join('').trim();
116
+ }
117
+ let finalTool;
118
+ if (accumulatedToolName) {
119
+ finalTool = {
120
+ id: accumulatedToolId,
121
+ name: accumulatedToolName,
122
+ arguments: JSON.parse(accumulatedToolInput.join('')),
123
+ };
124
+ }
125
+ ;
126
+ if (!role) {
127
+ console.debug('role was not set');
128
+ continue;
129
+ }
130
+ messages.push({
131
+ role: role == 'model' ? 'assistant' : role,
132
+ content: finalContent ?? undefined,
133
+ tool_calls: finalTool ? [finalTool] : undefined,
134
+ });
135
+ role = undefined;
136
+ accumulatedContent = [];
137
+ accumulatedToolName = undefined;
138
+ accumulatedToolId = undefined;
139
+ accumulatedToolInput = [];
140
+ break;
141
+ case 'content_block_stop':
142
+ case 'message_delta':
143
+ break;
144
+ default:
145
+ console.debug(`Invalid event: ${responseEvent}`);
146
+ }
147
+ }
148
+ return messages;
149
+ }
150
+ async handleNonStreamingResponse(response) {
151
+ const messages = [];
152
+ if (response.role !== 'assistant') {
153
+ throw new Error(`Response role must be 'assistant'. Make sure to use response
154
+ from anthropic.messages.create() when not using streaming.`);
155
+ }
156
+ messages.push(...this.convertClaudeMessagesToLLMMessages([response]));
157
+ return messages;
158
+ }
159
+ // Claude-specific function to convert Response API messages to LLM messages
160
+ convertClaudeMessagesToLLMMessages(messages) {
161
+ const extractedMessages = [];
162
+ for (const message of messages) {
163
+ if (typeof message.content === 'string') {
164
+ const llmMessage = {
165
+ role: message.role,
166
+ content: message.content,
167
+ };
168
+ extractedMessages.push(llmMessage);
169
+ continue;
170
+ }
171
+ const aggregatedContent = [];
172
+ const aggregatedToolCalls = [];
173
+ let role = message.role;
174
+ if (!message.content) {
175
+ continue;
176
+ }
177
+ for (const part of message.content) {
178
+ switch (part.type) {
179
+ case 'tool_use':
180
+ const toolUseBlock = part;
181
+ aggregatedToolCalls.push({
182
+ name: toolUseBlock.name,
183
+ arguments: toolUseBlock.input,
184
+ id: toolUseBlock.id,
185
+ });
186
+ break;
187
+ case 'tool_result':
188
+ role = 'tool'; // Claude expects 'user' role for tool results. But Qualifire treats tool as results as it is sent from 'tool'
189
+ const toolResultBlock = part;
190
+ if (typeof toolResultBlock.content === 'string') {
191
+ aggregatedContent.push(toolResultBlock.content);
192
+ }
193
+ else {
194
+ toolResultBlock.content.filter(part => part.type === 'text').forEach(part => {
195
+ const textPart = part;
196
+ aggregatedContent.push(textPart.text);
197
+ });
198
+ }
199
+ break;
200
+ case 'text':
201
+ const textBlock = part;
202
+ aggregatedContent.push(textBlock.text);
203
+ break;
204
+ default:
205
+ console.debug('Invalid Claude output: message - ' +
206
+ JSON.stringify(message) +
207
+ ' part - ' +
208
+ JSON.stringify(part));
209
+ }
210
+ }
211
+ // If we accumulated aggregatedContent or aggregatedToolCalls, add the message
212
+ if (aggregatedContent.length > 0 || aggregatedToolCalls.length > 0) {
213
+ const accumulatedMessage = {
214
+ role,
215
+ };
216
+ if (aggregatedContent.length > 0) {
217
+ accumulatedMessage.content = aggregatedContent.join('');
218
+ }
219
+ // Only add aggregatedToolCalls property for assistant messages
220
+ if (aggregatedToolCalls.length > 0) {
221
+ accumulatedMessage.tool_calls = aggregatedToolCalls;
222
+ }
223
+ extractedMessages.push(accumulatedMessage);
224
+ }
225
+ }
226
+ return extractedMessages;
227
+ }
228
+ }
229
+ exports.ClaudeCanonicalEvaluationStrategy = ClaudeCanonicalEvaluationStrategy;
@@ -0,0 +1,11 @@
1
+ import { EvaluationProxyAPIRequest } from '../../types';
2
+ import { CanonicalEvaluationStrategy } from '../canonical';
3
+ type GeminiAICanonicalEvaluationStrategyResponse = any;
4
+ type GeminiAICanonicalEvaluationStrategyRequest = any;
5
+ export declare class GeminiAICanonicalEvaluationStrategy implements CanonicalEvaluationStrategy<GeminiAICanonicalEvaluationStrategyRequest, GeminiAICanonicalEvaluationStrategyResponse> {
6
+ convertToQualifireEvaluationRequest(request: GeminiAICanonicalEvaluationStrategyRequest, response: GeminiAICanonicalEvaluationStrategyResponse): Promise<EvaluationProxyAPIRequest>;
7
+ convertRequest(request: GeminiAICanonicalEvaluationStrategyRequest): Promise<EvaluationProxyAPIRequest>;
8
+ private handleNonStreamingResponse;
9
+ private handleStreaming;
10
+ }
11
+ export {};
@@ -0,0 +1,241 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.GeminiAICanonicalEvaluationStrategy = void 0;
4
+ class GeminiAICanonicalEvaluationStrategy {
5
+ async convertToQualifireEvaluationRequest(request, response) {
6
+ const { messages: requestMessages, available_tools: requestAvailableTools, } = await this.convertRequest(request);
7
+ const messages = requestMessages || [];
8
+ const available_tools = requestAvailableTools || [];
9
+ // Avoid undefined response
10
+ if (!response) {
11
+ return {
12
+ messages,
13
+ available_tools,
14
+ };
15
+ }
16
+ if (Array.isArray(response)) {
17
+ const streamingResultMessages = await this.handleStreaming(response);
18
+ messages.push(...streamingResultMessages);
19
+ }
20
+ else {
21
+ const nonStreamingResultMessages = await this.handleNonStreamingResponse(response);
22
+ messages.push(...nonStreamingResultMessages);
23
+ }
24
+ return {
25
+ messages,
26
+ available_tools,
27
+ };
28
+ }
29
+ async convertRequest(request) {
30
+ const messages = [];
31
+ const available_tools = [];
32
+ // Handle available tools
33
+ if (request?.config?.tools && request.config.tools.length > 0) {
34
+ for (const tool of request.config.tools) {
35
+ if (tool?.functionDeclarations?.length > 0) {
36
+ for (const functionDeclaration of tool.functionDeclarations) {
37
+ available_tools.push({
38
+ name: functionDeclaration.name,
39
+ description: functionDeclaration.description,
40
+ parameters: functionDeclaration.parameters,
41
+ });
42
+ }
43
+ }
44
+ }
45
+ }
46
+ const contents = [];
47
+ if (request?.config?.systemInstruction) {
48
+ /*
49
+ Gemini request.contents is an object called contentListUnions which can be
50
+ list of parts or content objects:
51
+ https://github.com/googleapis/js-genai/blob/b5d77e1bfea5c6b4903bc7ade986e91d6e146835/src/types.ts#L1937
52
+ */
53
+ const convertedSystemContent = convertContentListUnionsToContentList(request.config.systemInstruction, 'system');
54
+ if (convertedSystemContent.length != 1) {
55
+ throw new Error(`Invalid system instruction given. Gemini Does not support multiple system instructions: ${JSON.stringify(request.contents)}`);
56
+ }
57
+ contents.push(convertedSystemContent[0]);
58
+ }
59
+ // Handle request contents
60
+ if (request?.contents) {
61
+ const convertedContents = convertContentListUnionsToContentList(request.contents, 'user');
62
+ contents.push(...convertedContents);
63
+ }
64
+ for (const content of contents) {
65
+ const message = convertContentToLLMMessage(content);
66
+ if (message) {
67
+ messages.push(message);
68
+ }
69
+ }
70
+ return {
71
+ messages,
72
+ available_tools,
73
+ };
74
+ }
75
+ async handleNonStreamingResponse(response) {
76
+ const messages = [];
77
+ // Handle response candidates
78
+ if (response?.candidates && response.candidates.length > 0) {
79
+ if (response.candidates.length > 1) {
80
+ console.debug('Multiple candidates found in the response. Only first candidate is supported.');
81
+ }
82
+ const firstCandidate = response.candidates[0];
83
+ if (firstCandidate.content?.role &&
84
+ firstCandidate.content?.parts &&
85
+ firstCandidate.content.parts.length > 0) {
86
+ const message = convertContentToLLMMessage(firstCandidate.content);
87
+ if (message) {
88
+ messages.push(message);
89
+ }
90
+ }
91
+ }
92
+ return messages;
93
+ }
94
+ async handleStreaming(response) {
95
+ const messages = [];
96
+ const accumulatedContentParts = [];
97
+ let currentRole = 'assistant';
98
+ const toolCalls = [];
99
+ for (const chunk of response) {
100
+ if (chunk?.candidates && chunk.candidates.length > 0) {
101
+ if (chunk.candidates.length > 1) {
102
+ console.debug('Multiple candidates found in the response. Only first candidate is supported.');
103
+ }
104
+ const firstCandidate = chunk.candidates[0]; // we currently only support one response message
105
+ if (firstCandidate.content?.role) {
106
+ currentRole = firstCandidate.content.role;
107
+ if (currentRole === 'model') {
108
+ currentRole = 'assistant'; // Answers returned from the model are always from the assistant role
109
+ }
110
+ }
111
+ if (!firstCandidate.content) {
112
+ console.debug('Content is missing required fields. Skipping message.');
113
+ continue;
114
+ }
115
+ const message = convertContentToLLMMessage(firstCandidate.content);
116
+ if (message?.content) {
117
+ accumulatedContentParts.push(message.content);
118
+ }
119
+ if (message?.tool_calls) {
120
+ toolCalls.push(...message.tool_calls);
121
+ }
122
+ }
123
+ }
124
+ const accumulatedContent = accumulatedContentParts.length > 0
125
+ ? accumulatedContentParts.join('').trim()
126
+ : undefined;
127
+ if (accumulatedContent || toolCalls.length > 0) {
128
+ messages.push({
129
+ role: currentRole,
130
+ content: accumulatedContent,
131
+ tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
132
+ });
133
+ }
134
+ return messages;
135
+ }
136
+ }
137
+ exports.GeminiAICanonicalEvaluationStrategy = GeminiAICanonicalEvaluationStrategy;
138
+ // Helper function to convert a Gemini content object (with multiple parts) to LLMMessage
139
+ function convertContentToLLMMessage(content) {
140
+ if (!content.parts || content.parts.length === 0) {
141
+ return null;
142
+ }
143
+ // In Gemini role is optional, but by default the api is changing it to 'user' when no role is provided
144
+ let role = content.role || 'user';
145
+ const textContent = [];
146
+ const tool_calls = [];
147
+ // Process all parts and aggregate them
148
+ for (const part of content.parts) {
149
+ if (typeof part === 'string') {
150
+ textContent.push(part);
151
+ }
152
+ else if (part.text) {
153
+ textContent.push(part.text);
154
+ }
155
+ else if (part.functionCall) {
156
+ role = 'assistant'; // Function calls are always from assistant
157
+ tool_calls.push({
158
+ name: part.functionCall.name,
159
+ arguments: part.functionCall.args,
160
+ });
161
+ }
162
+ else if (part.functionResponse?.response?.result) {
163
+ role = 'tool';
164
+ textContent.push(JSON.stringify(part.functionResponse.response?.result));
165
+ }
166
+ }
167
+ // Determine final role
168
+ const finalRole = role === 'model' ? 'assistant' : role;
169
+ // Aggregate content based on message type
170
+ let finalContent;
171
+ if (textContent.length > 0) {
172
+ finalContent = textContent.join(' ');
173
+ }
174
+ return {
175
+ role: finalRole,
176
+ content: finalContent,
177
+ tool_calls: tool_calls.length > 0 ? tool_calls : undefined,
178
+ };
179
+ }
180
+ function isPartOrString(obj) {
181
+ if (typeof obj === 'string') {
182
+ return true;
183
+ }
184
+ if (typeof obj === 'object' && obj !== null) {
185
+ return ('fileData' in obj ||
186
+ 'text' in obj ||
187
+ 'functionCall' in obj ||
188
+ 'functionResponse' in obj ||
189
+ 'inlineData' in obj ||
190
+ 'videoMetadata' in obj ||
191
+ 'codeExecutionResult' in obj ||
192
+ 'executableCode' in obj);
193
+ }
194
+ return false;
195
+ }
196
+ function isContent(obj) {
197
+ if (typeof obj === 'object' && obj !== null) {
198
+ return 'parts' in obj || 'role' in obj;
199
+ }
200
+ return false;
201
+ }
202
+ function convertContentListUnionsToContentList(input, defaultRole) {
203
+ /*
204
+ Gemini request.contents is an object called contentListUnions which can be
205
+ list of parts or content objects:
206
+ https://github.com/googleapis/js-genai/blob/b5d77e1bfea5c6b4903bc7ade986e91d6e146835/src/types.ts#L1937
207
+ */
208
+ let inputs;
209
+ if (!Array.isArray(input)) {
210
+ inputs = [input];
211
+ }
212
+ else {
213
+ inputs = input;
214
+ }
215
+ if (inputs.length === 0) {
216
+ return [];
217
+ }
218
+ let convertedContents = [];
219
+ if (inputs.every(isContent)) {
220
+ convertedContents = inputs;
221
+ }
222
+ else if (inputs.every(isPartOrString)) {
223
+ const partInputs = [];
224
+ for (const partOrString of inputs) {
225
+ if (typeof partOrString === 'string') {
226
+ partInputs.push({ text: partOrString });
227
+ }
228
+ else {
229
+ partInputs.push(partOrString);
230
+ }
231
+ }
232
+ convertedContents.push({
233
+ role: defaultRole,
234
+ parts: partInputs,
235
+ });
236
+ }
237
+ else {
238
+ throw new Error(`Invalid contents given. Gemini Does not support mixing parts and contents: ${JSON.stringify(inputs)}`);
239
+ }
240
+ return convertedContents;
241
+ }
@@ -0,0 +1,28 @@
1
+ import { EvaluationProxyAPIRequest, LLMToolDefinition } from '../../types';
2
+ import { CanonicalEvaluationStrategy } from '../canonical';
3
+ import { ChatCompletion, ChatCompletionChunk } from 'openai/resources/chat/completions';
4
+ import { ChatCompletionCreateParamsBase, ChatCompletionCreateParamsNonStreaming, ChatCompletionCreateParamsStreaming } from 'openai/resources/chat/completions/completions';
5
+ import { Response, ResponseCreateParamsBase, ResponseCreateParamsNonStreaming, ResponseCreateParamsStreaming, ResponseStreamEvent } from 'openai/resources/responses/responses';
6
+ type OpenAIResponseCreateRequest = ResponseCreateParamsNonStreaming | ResponseCreateParamsStreaming | ResponseCreateParamsBase;
7
+ type OpenAIResponseCreateRequestResponse = Response | Array<ResponseStreamEvent>;
8
+ type OpenAIChatCompletionsCreateRequest = ChatCompletionCreateParamsNonStreaming | ChatCompletionCreateParamsStreaming | ChatCompletionCreateParamsBase;
9
+ type OpenAIChatCompletionsCreateResponse = ChatCompletion | Array<ChatCompletionChunk>;
10
+ type OpenAICanonicalEvaluationStrategyRequest = OpenAIResponseCreateRequest | OpenAIChatCompletionsCreateRequest;
11
+ type OpenAICanonicalEvaluationStrategyResponse = OpenAIResponseCreateRequestResponse | OpenAIChatCompletionsCreateResponse;
12
+ export declare class OpenAICanonicalEvaluationStrategy implements CanonicalEvaluationStrategy<OpenAICanonicalEvaluationStrategyRequest, OpenAICanonicalEvaluationStrategyResponse> {
13
+ convertToQualifireEvaluationRequest(request: OpenAICanonicalEvaluationStrategyRequest, response: OpenAICanonicalEvaluationStrategyResponse): Promise<EvaluationProxyAPIRequest>;
14
+ convertRequest(request: any): Promise<EvaluationProxyAPIRequest>;
15
+ convertRequestForChatCompletions(request: OpenAIChatCompletionsCreateRequest): Promise<EvaluationProxyAPIRequest>;
16
+ private convertRequestMessagesForChatCompletions;
17
+ convertRequestForResponsesAPI(request: OpenAIResponseCreateRequest): Promise<EvaluationProxyAPIRequest>;
18
+ private handleStreaming;
19
+ private handleChatCompletionsStreaming;
20
+ private handleResponsesApiStreaming;
21
+ private processChatCompletionsChunk;
22
+ private processResponsesApiChunkMessage;
23
+ private processResponsesApiChunkToolCall;
24
+ private handleChatCompletionsNonStreaming;
25
+ private handleResponsesApiNonStreaming;
26
+ }
27
+ export declare function convertToolsToLLMDefinitions(tools: unknown[]): LLMToolDefinition[];
28
+ export {};