qualifire 1.2.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- Qualifire
1
+ # Qualifire SDK
2
2
 
3
3
  [![CodeQL](https://github.com/qualifire-dev/qualifire-typescript-sdk/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/qualifire-dev/qualifire-typescript-sdk/actions/workflows/codeql-analysis.yml)
4
4
  [![Release](https://github.com/qualifire-dev/qualifire-typescript-sdk/actions/workflows/release.yml/badge.svg)](https://github.com/qualifire-dev/qualifire-typescript-sdk/actions/workflows/release.yml)
@@ -7,9 +7,7 @@ Qualifire
7
7
  [![Commitizen Friendly][commitizen-img]][commitizen-url]
8
8
  [![Semantic Release][semantic-release-img]][semantic-release-url]
9
9
 
10
- # Qualifire SDK
11
-
12
- This is the official SDK for interacting with the Qualifire API.
10
+ The official TypeScript SDK for evaluating LLM outputs with [Qualifire](https://qualifire.ai). Detect hallucinations, prompt injections, PII leakage, content policy violations, and more.
13
11
 
14
12
  ## Installation
15
13
 
@@ -17,79 +15,238 @@ This is the official SDK for interacting with the Qualifire API.
17
15
  npm install qualifire
18
16
  ```
19
17
 
20
- ## usage
18
+ ## Quick Start
19
+
20
+ ```typescript
21
+ import { Qualifire } from 'qualifire';
22
+ import OpenAI from 'openai';
23
+
24
+ const qualifire = new Qualifire({ apiKey: 'your-api-key' });
25
+ const openai = new OpenAI();
26
+
27
+ // Make your LLM call
28
+ const request = {
29
+ model: 'gpt-4o',
30
+ messages: [
31
+ { role: 'system', content: 'You are a helpful assistant.' },
32
+ { role: 'user', content: 'What is the capital of France?' },
33
+ ],
34
+ };
35
+
36
+ const response = await openai.chat.completions.create(request);
21
37
 
22
- First, import the `Qualifire` class from the SDK:
38
+ // Evaluate the response
39
+ const evaluation = await qualifire.evaluate({
40
+ framework: 'openai',
41
+ request,
42
+ response,
43
+ hallucinationsCheck: true,
44
+ groundingCheck: true,
45
+ });
23
46
 
24
- ```javascript
25
- import { Qualifire } from 'qualifire-sdk';
47
+ console.log(evaluation);
48
+ // {
49
+ // status: 'passed',
50
+ // score: 100,
51
+ // evaluationResults: [...]
52
+ // }
26
53
  ```
27
54
 
28
- Then, create a new instance of the Qualifire class, passing your API key and the base URL of the Qualifire API:
55
+ ## Supported Frameworks
29
56
 
30
- ```javascript
31
- const qualifire = new Qualifire({
32
- apiKey: 'your-api-key',
57
+ | Framework | Value | SDK |
58
+ |-----------|-------|-----|
59
+ | OpenAI | `openai` | `openai` (Chat Completions & Responses API) |
60
+ | Anthropic Claude | `claude` | `@anthropic-ai/sdk` |
61
+ | Google Gemini | `gemini` | `@google/genai` |
62
+ | Vercel AI SDK | `vercelai` | `ai` |
63
+
64
+ All frameworks support both streaming and non-streaming responses.
65
+
66
+ ## Available Evaluation Checks
67
+
68
+ | Check | Parameter | Description |
69
+ |-------|-----------|-------------|
70
+ | Hallucinations | `hallucinationsCheck` | Detect fabricated information |
71
+ | Grounding | `groundingCheck` | Verify responses are grounded in context |
72
+ | Prompt Injections | `promptInjections` | Detect prompt injection attempts |
73
+ | PII Detection | `piiCheck` | Identify personally identifiable information |
74
+ | Content Moderation | `contentModerationCheck` | Flag harmful content |
75
+ | Instructions Following | `instructionsFollowingCheck` | Verify adherence to system instructions |
76
+ | Tool Selection Quality | `toolSelectionQualityCheck` | Evaluate tool/function call accuracy |
77
+ | Custom Assertions | `assertions` | Array of custom assertion strings |
78
+
79
+ ## Framework Examples
80
+
81
+ ### OpenAI
82
+
83
+ ```typescript
84
+ // Chat Completions API
85
+ const request = {
86
+ model: 'gpt-4o',
87
+ messages: [{ role: 'user', content: 'Hello!' }],
88
+ };
89
+ const response = await openai.chat.completions.create(request);
90
+
91
+ await qualifire.evaluate({
92
+ framework: 'openai',
93
+ request,
94
+ response,
95
+ hallucinationsCheck: true,
96
+ });
97
+
98
+ // Streaming
99
+ const streamRequest = { ...request, stream: true };
100
+ const stream = await openai.chat.completions.create(streamRequest);
101
+
102
+ const chunks = [];
103
+ for await (const chunk of stream) {
104
+ chunks.push(chunk);
105
+ }
106
+
107
+ await qualifire.evaluate({
108
+ framework: 'openai',
109
+ request: streamRequest,
110
+ response: chunks,
111
+ hallucinationsCheck: true,
33
112
  });
34
113
  ```
35
114
 
36
- ℹ️ There are default environment variables if you prefer to set it that way `QUALIFIRE_API_KEY`
115
+ ### Anthropic Claude
37
116
 
38
- You can now use the `evaluate` method to evaluate input and output data:
117
+ ```typescript
118
+ import Anthropic from '@anthropic-ai/sdk';
39
119
 
40
- ```javascript
41
- const input = {
42
- model: 'gpt-3.5-turbo',
43
- messages: [
44
- {
45
- role: 'user',
46
- content: 'this is my awesome request',
47
- },
48
- ],
120
+ const anthropic = new Anthropic();
121
+
122
+ const request = {
123
+ model: 'claude-sonnet-4-20250514',
124
+ max_tokens: 1024,
125
+ messages: [{ role: 'user', content: 'Hello!' }],
49
126
  };
127
+ const response = await anthropic.messages.create(request);
50
128
 
51
- const output = await openai.chat.completions.create(input);
129
+ await qualifire.evaluate({
130
+ framework: 'claude',
131
+ request,
132
+ response,
133
+ promptInjections: true,
134
+ });
135
+ ```
136
+
137
+ ### Google Gemini
138
+
139
+ ```typescript
140
+ import { GoogleGenAI } from '@google/genai';
141
+
142
+ const genai = new GoogleGenAI({ apiKey: 'your-key' });
143
+
144
+ const request = {
145
+ model: 'gemini-2.0-flash',
146
+ contents: [{ role: 'user', parts: [{ text: 'Hello!' }] }],
147
+ };
148
+ const response = await genai.models.generateContent(request);
149
+
150
+ await qualifire.evaluate({
151
+ framework: 'gemini',
152
+ request,
153
+ response,
154
+ contentModerationCheck: true,
155
+ });
156
+ ```
157
+
158
+ ### Vercel AI SDK
159
+
160
+ ```typescript
161
+ import { generateText } from 'ai';
162
+ import { openai } from '@ai-sdk/openai';
163
+
164
+ const request = {
165
+ model: openai('gpt-4o'),
166
+ prompt: 'Hello!',
167
+ };
168
+ const response = await generateText(request);
52
169
 
53
- const evaluationResponse = await qualifire.evaluate(input, output); // This will block until the evaluation is done
54
- console.log(evaluationResponse);
170
+ await qualifire.evaluate({
171
+ framework: 'vercelai',
172
+ request,
173
+ response,
174
+ piiCheck: true,
175
+ });
55
176
  ```
56
177
 
57
- ### Non-blocking execution
178
+ ## Direct Message Mode
58
179
 
59
- In case you want to trigger a completely async evaluation (to view in qualifire's UI) simply add the `{async: true}` option to your call.
180
+ For cases where you don't use a supported framework, pass messages directly:
60
181
 
61
- ```javascript
62
- const input = {
63
- model: 'gpt-3.5-turbo',
182
+ ```typescript
183
+ await qualifire.evaluate({
64
184
  messages: [
65
- {
66
- role: 'user',
67
- content: 'this is my awesome request',
68
- },
185
+ { role: 'user', content: 'What is 2+2?' },
186
+ { role: 'assistant', content: 'The answer is 4.' },
69
187
  ],
70
- };
188
+ hallucinationsCheck: true,
189
+ groundingCheck: true,
190
+ });
191
+ ```
192
+
193
+ ## Invoke Pre-configured Evaluations
71
194
 
72
- const output = await openai.chat.completions.create(input);
195
+ Run evaluations configured in the Qualifire dashboard:
73
196
 
74
- const evaluationResponse = await qualifire.evaluate(input, output, {
75
- async: true,
76
- }); // This will block until the evaluation is done
77
- console.log(evaluationResponse);
197
+ ```typescript
198
+ const result = await qualifire.invokeEvaluation({
199
+ input: 'What is the capital of France?',
200
+ output: 'Paris is the capital of France.',
201
+ evaluationId: 'eval-123',
202
+ });
78
203
  ```
79
204
 
80
- Evaluates the input and output using the Qualifire API. Returns a promise that resolves to the evaluation response, or undefined if async is true.
205
+ ## Configuration
81
206
 
207
+ ### Constructor Options
208
+
209
+ ```typescript
210
+ const qualifire = new Qualifire({
211
+ apiKey: 'your-api-key', // Required (or set QUALIFIRE_API_KEY env var)
212
+ baseUrl: 'https://...', // Optional, defaults to https://proxy.qualifire.ai
213
+ });
214
+ ```
215
+
216
+ ### Environment Variables
217
+
218
+ | Variable | Description |
219
+ |----------|-------------|
220
+ | `QUALIFIRE_API_KEY` | API key for authentication |
221
+ | `QUALIFIRE_BASE_URL` | Override the API base URL |
222
+
223
+ ## Response Format
224
+
225
+ ```typescript
226
+ interface EvaluationResponse {
227
+ status: 'passed' | 'failed';
228
+ score: number; // 0-100
229
+ evaluationResults: Array<{
230
+ type: string;
231
+ results: Array<{
232
+ name: string;
233
+ score: number;
234
+ label: string;
235
+ confidence_score: number;
236
+ reason: string;
237
+ }>;
238
+ }>;
239
+ }
240
+ ```
82
241
 
242
+ ## License
83
243
 
244
+ MIT
84
245
 
85
- [build-img]: https://github.com/qualifire-dev/develop/qualifire-typescript-sdk/actions/workflows/release.yml/badge.svg
86
- [build-url]: https://github.com/qualifire-dev/qualifire-typescript-sdk/actions/workflows/release.yml
87
- [downloads-img]: https://img.shields.io/npm/dt/main/qualifire
88
- [npm-url]: https://www.npmjs.com/package/qualifire
89
- [issues-img]: https://img.shields.io/github/issues/qualifire-dev/develop/qualifire-typescript-sdk
246
+ [issues-img]: https://img.shields.io/github/issues/qualifire-dev/qualifire-typescript-sdk
90
247
  [issues-url]: https://github.com/qualifire-dev/qualifire-typescript-sdk/issues
91
- [codecov-img]: https://codecov.io/gh/qualifire-dev/develop/qualifire-typescript-sdk/branch/main/graph/badge.svg
92
- [codecov-url]: https://codecov.io/gh/qualifire-dev/develop/qualifire-typescript-sdk
248
+ [codecov-img]: https://codecov.io/gh/qualifire-dev/qualifire-typescript-sdk/branch/main/graph/badge.svg
249
+ [codecov-url]: https://codecov.io/gh/qualifire-dev/qualifire-typescript-sdk
93
250
  [semantic-release-img]: https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079.svg
94
251
  [semantic-release-url]: https://github.com/semantic-release/semantic-release
95
252
  [commitizen-img]: https://img.shields.io/badge/commitizen-friendly-brightgreen.svg
@@ -0,0 +1,4 @@
1
+ import { EvaluationProxyAPIRequest } from '../types';
2
+ export interface CanonicalEvaluationStrategy<RequestType, ResponseType> {
3
+ convertToQualifireEvaluationRequest(request: RequestType, response: ResponseType): Promise<EvaluationProxyAPIRequest>;
4
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,15 @@
1
+ import { Message, type MessageCreateParams, type MessageStreamParams } from '@anthropic-ai/sdk/resources';
2
+ import { RawMessageStreamEvent } from '@anthropic-ai/sdk/resources/messages';
3
+ import { EvaluationProxyAPIRequest } from '../../types';
4
+ import { CanonicalEvaluationStrategy } from '../canonical';
5
+ type AnthropicCreateAPIResponsesType = Message | RawMessageStreamEvent;
6
+ type AnthropicAPIRequestsType = MessageCreateParams;
7
+ type AnthropicAPIResponsesType = AnthropicCreateAPIResponsesType | MessageStreamParams;
8
+ export declare class ClaudeCanonicalEvaluationStrategy implements CanonicalEvaluationStrategy<AnthropicAPIRequestsType, AnthropicAPIResponsesType> {
9
+ convertToQualifireEvaluationRequest(request: AnthropicAPIRequestsType, response: AnthropicAPIResponsesType): Promise<EvaluationProxyAPIRequest>;
10
+ convertRequest(request: any): EvaluationProxyAPIRequest;
11
+ private handleStreaming;
12
+ private handleNonStreamingResponse;
13
+ private convertClaudeMessagesToLLMMessages;
14
+ }
15
+ export {};
@@ -0,0 +1,229 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ClaudeCanonicalEvaluationStrategy = void 0;
4
+ class ClaudeCanonicalEvaluationStrategy {
5
+ async convertToQualifireEvaluationRequest(request, response) {
6
+ const { messages: requestMessages, available_tools: requestAvailableTools, } = this.convertRequest(request);
7
+ const messages = requestMessages || [];
8
+ const availableTools = requestAvailableTools || [];
9
+ // Avoid undefined response
10
+ if (!response) {
11
+ return {
12
+ messages,
13
+ available_tools: availableTools,
14
+ };
15
+ }
16
+ // Check if response is streaming or non-streaming
17
+ if (Array.isArray(response)) {
18
+ const streamingResultMessages = await this.handleStreaming(response);
19
+ messages.push(...streamingResultMessages);
20
+ }
21
+ else {
22
+ const nonStreamingResultMessages = await this.handleNonStreamingResponse(response);
23
+ messages.push(...nonStreamingResultMessages);
24
+ }
25
+ return {
26
+ messages,
27
+ available_tools: availableTools,
28
+ };
29
+ }
30
+ convertRequest(request) {
31
+ const messages = [];
32
+ const availableTools = [];
33
+ // Handle Claude system message first (if present)
34
+ if (request?.system) {
35
+ messages.push({
36
+ role: 'system',
37
+ content: request.system,
38
+ });
39
+ }
40
+ // Handle Claude request messages
41
+ if (request?.messages) {
42
+ messages.push(...this.convertClaudeMessagesToLLMMessages(request.messages));
43
+ }
44
+ // Handle tools
45
+ if (request?.tools) {
46
+ for (const tool of request.tools) {
47
+ availableTools.push({
48
+ name: tool.name,
49
+ description: tool.description,
50
+ parameters: tool.input_schema?.properties || {},
51
+ });
52
+ }
53
+ }
54
+ return {
55
+ messages,
56
+ available_tools: availableTools,
57
+ };
58
+ }
59
+ async handleStreaming(response) {
60
+ const messages = [];
61
+ let role;
62
+ let accumulatedContent = [];
63
+ let accumulatedToolName;
64
+ let accumulatedToolId;
65
+ let accumulatedToolInput = [];
66
+ for (const responseEvent of response) {
67
+ switch (responseEvent.type) {
68
+ case 'message_start':
69
+ const rawMessageStartEvent = responseEvent;
70
+ role = rawMessageStartEvent.message.role;
71
+ accumulatedContent = [];
72
+ accumulatedToolName = undefined;
73
+ accumulatedToolId = undefined;
74
+ accumulatedToolInput = [];
75
+ break;
76
+ case 'content_block_start':
77
+ const rawContentBlockStartEvent = responseEvent;
78
+ switch (rawContentBlockStartEvent.content_block.type) {
79
+ case 'text':
80
+ const textBlock = rawContentBlockStartEvent.content_block;
81
+ accumulatedContent.push(textBlock.text);
82
+ break;
83
+ case 'tool_use':
84
+ const toolUseBlock = rawContentBlockStartEvent.content_block;
85
+ accumulatedToolId = toolUseBlock.id;
86
+ accumulatedToolName = toolUseBlock.name;
87
+ accumulatedToolInput = [];
88
+ break;
89
+ case 'thinking':
90
+ const thinkingBlock = rawContentBlockStartEvent.content_block;
91
+ accumulatedContent.push(thinkingBlock.thinking);
92
+ break;
93
+ default:
94
+ console.debug(`Invalid content block type: ${responseEvent}`);
95
+ }
96
+ break;
97
+ case 'content_block_delta':
98
+ const rawContentBlockDeltaEvent = responseEvent;
99
+ switch (rawContentBlockDeltaEvent.delta.type) {
100
+ case 'text_delta':
101
+ const textDelta = rawContentBlockDeltaEvent.delta;
102
+ accumulatedContent.push(textDelta.text);
103
+ break;
104
+ case 'input_json_delta':
105
+ const inputJsonDelta = rawContentBlockDeltaEvent.delta;
106
+ accumulatedToolInput.push(inputJsonDelta.partial_json);
107
+ break;
108
+ default:
109
+ console.debug(`Invalid delta type: ${rawContentBlockDeltaEvent}`);
110
+ }
111
+ break;
112
+ case 'message_stop':
113
+ let finalContent;
114
+ if (accumulatedContent.length > 0) {
115
+ finalContent = accumulatedContent.join('').trim();
116
+ }
117
+ let finalTool;
118
+ if (accumulatedToolName) {
119
+ finalTool = {
120
+ id: accumulatedToolId,
121
+ name: accumulatedToolName,
122
+ arguments: JSON.parse(accumulatedToolInput.join('')),
123
+ };
124
+ }
125
+ ;
126
+ if (!role) {
127
+ console.debug('role was not set');
128
+ continue;
129
+ }
130
+ messages.push({
131
+ role: role == 'model' ? 'assistant' : role,
132
+ content: finalContent ?? undefined,
133
+ tool_calls: finalTool ? [finalTool] : undefined,
134
+ });
135
+ role = undefined;
136
+ accumulatedContent = [];
137
+ accumulatedToolName = undefined;
138
+ accumulatedToolId = undefined;
139
+ accumulatedToolInput = [];
140
+ break;
141
+ case 'content_block_stop':
142
+ case 'message_delta':
143
+ break;
144
+ default:
145
+ console.debug(`Invalid event: ${responseEvent}`);
146
+ }
147
+ }
148
+ return messages;
149
+ }
150
+ async handleNonStreamingResponse(response) {
151
+ const messages = [];
152
+ if (response.role !== 'assistant') {
153
+ throw new Error(`Response role must be 'assistant'. Make sure to use response
154
+ from anthropic.messages.create() when not using streaming.`);
155
+ }
156
+ messages.push(...this.convertClaudeMessagesToLLMMessages([response]));
157
+ return messages;
158
+ }
159
+ // Claude-specific function to convert Response API messages to LLM messages
160
+ convertClaudeMessagesToLLMMessages(messages) {
161
+ const extractedMessages = [];
162
+ for (const message of messages) {
163
+ if (typeof message.content === 'string') {
164
+ const llmMessage = {
165
+ role: message.role,
166
+ content: message.content,
167
+ };
168
+ extractedMessages.push(llmMessage);
169
+ continue;
170
+ }
171
+ const aggregatedContent = [];
172
+ const aggregatedToolCalls = [];
173
+ let role = message.role;
174
+ if (!message.content) {
175
+ continue;
176
+ }
177
+ for (const part of message.content) {
178
+ switch (part.type) {
179
+ case 'tool_use':
180
+ const toolUseBlock = part;
181
+ aggregatedToolCalls.push({
182
+ name: toolUseBlock.name,
183
+ arguments: toolUseBlock.input,
184
+ id: toolUseBlock.id,
185
+ });
186
+ break;
187
+ case 'tool_result':
188
+ role = 'tool'; // Claude expects 'user' role for tool results. But Qualifire treats tool as results as it is sent from 'tool'
189
+ const toolResultBlock = part;
190
+ if (typeof toolResultBlock.content === 'string') {
191
+ aggregatedContent.push(toolResultBlock.content);
192
+ }
193
+ else {
194
+ toolResultBlock.content.filter(part => part.type === 'text').forEach(part => {
195
+ const textPart = part;
196
+ aggregatedContent.push(textPart.text);
197
+ });
198
+ }
199
+ break;
200
+ case 'text':
201
+ const textBlock = part;
202
+ aggregatedContent.push(textBlock.text);
203
+ break;
204
+ default:
205
+ console.debug('Invalid Claude output: message - ' +
206
+ JSON.stringify(message) +
207
+ ' part - ' +
208
+ JSON.stringify(part));
209
+ }
210
+ }
211
+ // If we accumulated aggregatedContent or aggregatedToolCalls, add the message
212
+ if (aggregatedContent.length > 0 || aggregatedToolCalls.length > 0) {
213
+ const accumulatedMessage = {
214
+ role,
215
+ };
216
+ if (aggregatedContent.length > 0) {
217
+ accumulatedMessage.content = aggregatedContent.join('');
218
+ }
219
+ // Only add aggregatedToolCalls property for assistant messages
220
+ if (aggregatedToolCalls.length > 0) {
221
+ accumulatedMessage.tool_calls = aggregatedToolCalls;
222
+ }
223
+ extractedMessages.push(accumulatedMessage);
224
+ }
225
+ }
226
+ return extractedMessages;
227
+ }
228
+ }
229
+ exports.ClaudeCanonicalEvaluationStrategy = ClaudeCanonicalEvaluationStrategy;
@@ -0,0 +1,11 @@
1
+ import { EvaluationProxyAPIRequest } from '../../types';
2
+ import { CanonicalEvaluationStrategy } from '../canonical';
3
+ type GeminiAICanonicalEvaluationStrategyResponse = any;
4
+ type GeminiAICanonicalEvaluationStrategyRequest = any;
5
+ export declare class GeminiAICanonicalEvaluationStrategy implements CanonicalEvaluationStrategy<GeminiAICanonicalEvaluationStrategyRequest, GeminiAICanonicalEvaluationStrategyResponse> {
6
+ convertToQualifireEvaluationRequest(request: GeminiAICanonicalEvaluationStrategyRequest, response: GeminiAICanonicalEvaluationStrategyResponse): Promise<EvaluationProxyAPIRequest>;
7
+ convertRequest(request: GeminiAICanonicalEvaluationStrategyRequest): Promise<EvaluationProxyAPIRequest>;
8
+ private handleNonStreamingResponse;
9
+ private handleStreaming;
10
+ }
11
+ export {};