deepeval 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +162 -0
- package/dist/annotation/api.d.ts +15 -0
- package/dist/annotation/api.js +8 -0
- package/dist/annotation/index.d.ts +3 -0
- package/dist/annotation/index.js +36 -0
- package/dist/annotation/utils.d.ts +2 -0
- package/dist/annotation/utils.js +34 -0
- package/dist/confident/api.d.ts +40 -0
- package/dist/confident/api.js +206 -0
- package/dist/confident/evaluate.d.ts +11 -0
- package/dist/confident/evaluate.js +160 -0
- package/dist/confident/index.d.ts +6 -0
- package/dist/confident/index.js +24 -0
- package/dist/confident/types.d.ts +13 -0
- package/dist/confident/types.js +2 -0
- package/dist/config/settings.d.ts +11 -0
- package/dist/config/settings.js +30 -0
- package/dist/constants.d.ts +4 -0
- package/dist/constants.js +7 -0
- package/dist/dataset/api.d.ts +15 -0
- package/dist/dataset/api.js +2 -0
- package/dist/dataset/dataset.d.ts +54 -0
- package/dist/dataset/dataset.js +289 -0
- package/dist/dataset/golden.d.ts +61 -0
- package/dist/dataset/golden.js +65 -0
- package/dist/dataset/index.d.ts +7 -0
- package/dist/dataset/index.js +23 -0
- package/dist/dataset/utils.d.ts +9 -0
- package/dist/dataset/utils.js +116 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.js +68 -0
- package/dist/integrations/ai-sdk/index.d.ts +29 -0
- package/dist/integrations/ai-sdk/index.js +121 -0
- package/dist/integrations/ai-sdk/processor.d.ts +17 -0
- package/dist/integrations/ai-sdk/processor.js +260 -0
- package/dist/integrations/index.d.ts +2 -0
- package/dist/integrations/index.js +7 -0
- package/dist/integrations/langchain/callback-handler.d.ts +36 -0
- package/dist/integrations/langchain/callback-handler.js +236 -0
- package/dist/integrations/langchain/index.d.ts +1 -0
- package/dist/integrations/langchain/index.js +5 -0
- package/dist/integrations/langchain/patch-tool.d.ts +1 -0
- package/dist/integrations/langchain/patch-tool.js +56 -0
- package/dist/integrations/langchain/utils.d.ts +49 -0
- package/dist/integrations/langchain/utils.js +266 -0
- package/dist/metrics/base-metrics.d.ts +30 -0
- package/dist/metrics/base-metrics.js +36 -0
- package/dist/models/base-model.d.ts +34 -0
- package/dist/models/base-model.js +27 -0
- package/dist/models/index.d.ts +1 -0
- package/dist/models/index.js +5 -0
- package/dist/openai/extractor.d.ts +9 -0
- package/dist/openai/extractor.js +140 -0
- package/dist/openai/index.d.ts +2 -0
- package/dist/openai/index.js +12 -0
- package/dist/openai/patch.d.ts +3 -0
- package/dist/openai/patch.js +147 -0
- package/dist/openai/types.d.ts +15 -0
- package/dist/openai/types.js +2 -0
- package/dist/openai/utils.d.ts +7 -0
- package/dist/openai/utils.js +174 -0
- package/dist/prompt/index.d.ts +61 -0
- package/dist/prompt/index.js +301 -0
- package/dist/prompt/types.d.ts +51 -0
- package/dist/prompt/types.js +157 -0
- package/dist/prompt/utils.d.ts +20 -0
- package/dist/prompt/utils.js +175 -0
- package/dist/simulate/index.d.ts +29 -0
- package/dist/simulate/index.js +176 -0
- package/dist/telemetry.d.ts +13 -0
- package/dist/telemetry.js +322 -0
- package/dist/test-case/index.d.ts +1 -0
- package/dist/test-case/index.js +12 -0
- package/dist/test-case/llm-test-case.d.ts +120 -0
- package/dist/test-case/llm-test-case.js +181 -0
- package/dist/test-case/utils.d.ts +13 -0
- package/dist/test-case/utils.js +33 -0
- package/dist/tracing/api.d.ts +91 -0
- package/dist/tracing/api.js +16 -0
- package/dist/tracing/index.d.ts +4 -0
- package/dist/tracing/index.js +19 -0
- package/dist/tracing/logging.d.ts +12 -0
- package/dist/tracing/logging.js +44 -0
- package/dist/tracing/offline-evals/api.d.ts +7 -0
- package/dist/tracing/offline-evals/api.js +17 -0
- package/dist/tracing/offline-evals/index.d.ts +3 -0
- package/dist/tracing/offline-evals/index.js +9 -0
- package/dist/tracing/offline-evals/span.d.ts +4 -0
- package/dist/tracing/offline-evals/span.js +18 -0
- package/dist/tracing/offline-evals/thread.d.ts +4 -0
- package/dist/tracing/offline-evals/thread.js +19 -0
- package/dist/tracing/offline-evals/trace.d.ts +4 -0
- package/dist/tracing/offline-evals/trace.js +18 -0
- package/dist/tracing/trace-context.d.ts +26 -0
- package/dist/tracing/trace-context.js +59 -0
- package/dist/tracing/tracing.d.ts +328 -0
- package/dist/tracing/tracing.js +1085 -0
- package/dist/tracing/utils.d.ts +11 -0
- package/dist/tracing/utils.js +45 -0
- package/dist/utils.d.ts +22 -0
- package/dist/utils.js +84 -0
- package/package.json +135 -0
package/README.md
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# DeepEval.ts
|
|
2
|
+
|
|
3
|
+
TypeScript client for Confident AI's DeepEval API - a framework for evaluating and testing Large Language Models (LLMs).
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install deepeval-ts
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Authentication
|
|
12
|
+
|
|
13
|
+
DeepEval.ts requires a Confident AI API key to authenticate with the service. You can set up your API key in one of the following ways:
|
|
14
|
+
|
|
15
|
+
### Option 1: Environment Variables
|
|
16
|
+
|
|
17
|
+
Set the `CONFIDENT_API_KEY` environment variable:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# In your terminal
|
|
21
|
+
export CONFIDENT_API_KEY="your-api-key-here"
|
|
22
|
+
|
|
23
|
+
# Or for Windows
|
|
24
|
+
set CONFIDENT_API_KEY=your-api-key-here
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Option 2: .env File
|
|
28
|
+
|
|
29
|
+
Create a `.env` file in your project root:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
# .env file
|
|
33
|
+
CONFIDENT_API_KEY="your-api-key-here"
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Then use a package like `dotenv` to load it:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
npm install dotenv
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
```javascript
|
|
43
|
+
// At the top of your entry file
|
|
44
|
+
import 'dotenv/config';
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Option 3: Pass API Key Directly
|
|
48
|
+
|
|
49
|
+
You can also pass your API key directly when creating an API instance:
|
|
50
|
+
|
|
51
|
+
```typescript
|
|
52
|
+
import { Api } from 'deepeval-ts';
|
|
53
|
+
|
|
54
|
+
const api = new Api("your-api-key-here");
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Usage Examples
|
|
58
|
+
|
|
59
|
+
### Working with Datasets
|
|
60
|
+
|
|
61
|
+
```typescript
|
|
62
|
+
import { EvaluationDataset, LLMTestCase } from 'deepeval-ts';
|
|
63
|
+
import * as path from 'path';
|
|
64
|
+
|
|
65
|
+
// Load dataset from CSV
|
|
66
|
+
const dataset = new EvaluationDataset();
|
|
67
|
+
await dataset.addTestCasesFromCsvFile(
|
|
68
|
+
'path/to/dataset.csv',
|
|
69
|
+
'input_column',
|
|
70
|
+
'actual_output_column',
|
|
71
|
+
'expected_output_column'
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
// Create dataset programmatically
|
|
75
|
+
const customDataset = new EvaluationDataset();
|
|
76
|
+
customDataset.addTestCase(
|
|
77
|
+
new LLMTestCase({
|
|
78
|
+
input: "What is the capital of France?",
|
|
79
|
+
actualOutput: "Paris is the capital of France.",
|
|
80
|
+
expectedOutput: "Paris"
|
|
81
|
+
})
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
// Iterate through test cases
|
|
85
|
+
for (const testCase of dataset.testCases) {
|
|
86
|
+
console.log(`Input: ${testCase.input}`);
|
|
87
|
+
console.log(`Output: ${testCase.actualOutput}`);
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## API Reference
|
|
92
|
+
|
|
93
|
+
### EvaluationDataset
|
|
94
|
+
|
|
95
|
+
The `EvaluationDataset` class manages collections of test cases for LLM evaluation.
|
|
96
|
+
|
|
97
|
+
```typescript
|
|
98
|
+
// Create a new dataset
|
|
99
|
+
const dataset = new EvaluationDataset();
|
|
100
|
+
|
|
101
|
+
// Add test cases from CSV
|
|
102
|
+
await dataset.addTestCasesFromCsvFile(
|
|
103
|
+
filePath, // Path to CSV file
|
|
104
|
+
inputColumn, // Name of input column
|
|
105
|
+
actualOutputColumn, // Name of actual output column
|
|
106
|
+
expectedOutputColumn, // Name of expected output column (optional)
|
|
107
|
+
contextColumn, // Name of context column (optional)
|
|
108
|
+
contextDelimiter, // Delimiter for context values (optional)
|
|
109
|
+
retrievalContextColumn, // Name of retrieval context column (optional)
|
|
110
|
+
retrievalContextDelimiter // Delimiter for retrieval context values (optional)
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
// Add a test case programmatically
|
|
114
|
+
dataset.addTestCase(
|
|
115
|
+
new LLMTestCase({
|
|
116
|
+
input: "What is the capital of France?",
|
|
117
|
+
actualOutput: "Paris is the capital of France.",
|
|
118
|
+
expectedOutput: "Paris",
|
|
119
|
+
context: ["France is a country in Europe.", "Paris is a city."],
|
|
120
|
+
retrievalContext: ["Paris is the capital and most populous city of France."]
|
|
121
|
+
})
|
|
122
|
+
);
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### LLMTestCase
|
|
126
|
+
|
|
127
|
+
The `LLMTestCase` class represents individual test cases for LLM evaluation.
|
|
128
|
+
|
|
129
|
+
```typescript
|
|
130
|
+
const testCase = new LLMTestCase({
|
|
131
|
+
input: "What is the capital of France?",
|
|
132
|
+
actualOutput: "Paris is the capital of France.",
|
|
133
|
+
expectedOutput: "Paris",
|
|
134
|
+
context: ["France is a country in Europe.", "Paris is a city."],
|
|
135
|
+
retrievalContext: ["Paris is the capital and most populous city of France."],
|
|
136
|
+
toolCalls: [
|
|
137
|
+
{
|
|
138
|
+
name: "search",
|
|
139
|
+
input: { query: "capital of France" },
|
|
140
|
+
output: { result: "Paris is the capital of France" }
|
|
141
|
+
}
|
|
142
|
+
]
|
|
143
|
+
});
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Development
|
|
147
|
+
|
|
148
|
+
To build the package locally:
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
npm run build
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
To run tests:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
npm test
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## License
|
|
161
|
+
|
|
162
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export declare enum AnnotationType {
|
|
2
|
+
THUMBS_RATING = "THUMBS_RATING",
|
|
3
|
+
FIVE_STAR_RATING = "FIVE_STAR_RATING"
|
|
4
|
+
}
|
|
5
|
+
export interface APIAnnotation {
|
|
6
|
+
rating: number;
|
|
7
|
+
traceUuid?: string;
|
|
8
|
+
spanUuid?: string;
|
|
9
|
+
threadId?: string;
|
|
10
|
+
expectedOutput?: string;
|
|
11
|
+
expectedOutcome?: string;
|
|
12
|
+
explanation?: string;
|
|
13
|
+
type?: AnnotationType;
|
|
14
|
+
userId?: string;
|
|
15
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.AnnotationType = void 0;
|
|
4
|
+
var AnnotationType;
|
|
5
|
+
(function (AnnotationType) {
|
|
6
|
+
AnnotationType["THUMBS_RATING"] = "THUMBS_RATING";
|
|
7
|
+
AnnotationType["FIVE_STAR_RATING"] = "FIVE_STAR_RATING";
|
|
8
|
+
})(AnnotationType || (exports.AnnotationType = AnnotationType = {}));
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
exports.sendAnnotation = void 0;
|
|
18
|
+
const utils_1 = require("./utils");
|
|
19
|
+
const confident_1 = require("../confident");
|
|
20
|
+
__exportStar(require("./api"), exports);
|
|
21
|
+
const sendAnnotation = async ({ rating, traceUuid, spanUuid, threadId, expectedOutput, expectedOutcome, explanation, type, userId, }) => {
|
|
22
|
+
const apiAnnotation = (0, utils_1.validateAPIAnnotation)({
|
|
23
|
+
rating,
|
|
24
|
+
traceUuid,
|
|
25
|
+
spanUuid,
|
|
26
|
+
threadId,
|
|
27
|
+
expectedOutput,
|
|
28
|
+
expectedOutcome,
|
|
29
|
+
explanation,
|
|
30
|
+
type,
|
|
31
|
+
userId,
|
|
32
|
+
});
|
|
33
|
+
const api = new confident_1.Api();
|
|
34
|
+
await api.sendRequest(confident_1.HttpMethods.POST, confident_1.Endpoints.ANNOTATION_ENDPOINT, apiAnnotation);
|
|
35
|
+
};
|
|
36
|
+
exports.sendAnnotation = sendAnnotation;
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.validateAPIAnnotation = void 0;
|
|
4
|
+
const api_1 = require("./api");
|
|
5
|
+
const validateAPIAnnotation = (apiAnnotation) => {
|
|
6
|
+
const hasTrace = !!apiAnnotation.traceUuid;
|
|
7
|
+
const hasSpan = !!apiAnnotation.spanUuid;
|
|
8
|
+
const hasThread = !!apiAnnotation.threadId;
|
|
9
|
+
const idCount = [hasTrace, hasSpan, hasThread].filter(Boolean).length;
|
|
10
|
+
if (idCount > 1) {
|
|
11
|
+
throw new Error("Only one of 'traceUuid', 'spanUuid', or 'threadId' should be provided.");
|
|
12
|
+
}
|
|
13
|
+
if (idCount === 0) {
|
|
14
|
+
throw new Error("One of 'traceUuid', 'spanUuid', or 'threadId' must be provided.");
|
|
15
|
+
}
|
|
16
|
+
if (apiAnnotation.type === api_1.AnnotationType.FIVE_STAR_RATING &&
|
|
17
|
+
apiAnnotation.rating !== undefined &&
|
|
18
|
+
(apiAnnotation.rating < 1 || apiAnnotation.rating > 5)) {
|
|
19
|
+
throw new Error("Five star rating must be between 1 and 5.");
|
|
20
|
+
}
|
|
21
|
+
if (apiAnnotation.type === api_1.AnnotationType.THUMBS_RATING &&
|
|
22
|
+
apiAnnotation.rating !== undefined &&
|
|
23
|
+
(apiAnnotation.rating < 0 || apiAnnotation.rating > 1)) {
|
|
24
|
+
throw new Error("Thumbs rating must be either 0 or 1.");
|
|
25
|
+
}
|
|
26
|
+
if (apiAnnotation.threadId && apiAnnotation.expectedOutput) {
|
|
27
|
+
throw new Error("Expected output cannot be provided for threads.");
|
|
28
|
+
}
|
|
29
|
+
if (!apiAnnotation.threadId && apiAnnotation.expectedOutcome) {
|
|
30
|
+
throw new Error("Expected outcome cannot be provided for traces or spans.");
|
|
31
|
+
}
|
|
32
|
+
return apiAnnotation;
|
|
33
|
+
};
|
|
34
|
+
exports.validateAPIAnnotation = validateAPIAnnotation;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
export declare const DEEPEVAL_BASE_URL = "https://deepeval.confident-ai.com";
|
|
2
|
+
export declare const API_BASE_URL: string;
|
|
3
|
+
export declare enum HttpMethods {
|
|
4
|
+
GET = "GET",
|
|
5
|
+
POST = "POST",
|
|
6
|
+
DELETE = "DELETE",
|
|
7
|
+
PUT = "PUT"
|
|
8
|
+
}
|
|
9
|
+
export declare enum Endpoints {
|
|
10
|
+
DATASET_ENDPOINT = "/v1/datasets",
|
|
11
|
+
TEST_RUN_ENDPOINT = "/v1/test-run",
|
|
12
|
+
TRACING_ENDPOINT = "/v1/tracing",
|
|
13
|
+
TRACES_ENDPOINT = "/v1/traces",
|
|
14
|
+
EVENT_ENDPOINT = "/v1/event",
|
|
15
|
+
FEEDBACK_ENDPOINT = "/v1/feedback",
|
|
16
|
+
PROMPTS_VERSION_ID_ENDPOINT = "/v1/prompts/:alias/versions/:version",
|
|
17
|
+
PROMPTS_LABEL_ENDPOINT = "/v1/prompts/:alias/labels/:label",
|
|
18
|
+
PROMPTS_COMMITS_ENDPOINT = "/v1/prompts/:alias/commits",
|
|
19
|
+
PROMPTS_COMMIT_HASH_ENDPOINT = "/v1/prompts/:alias/commits/:hash",
|
|
20
|
+
PROMPTS_VERSIONS_ENDPOINT = "/v1/prompts/:alias/versions",
|
|
21
|
+
PROMPTS_ENDPOINT = "/v1/prompts",
|
|
22
|
+
RECOMMEND_ENDPOINT = "/v1/recommend-metrics",
|
|
23
|
+
EVALUATE_ENDPOINT = "/v1/evaluate",
|
|
24
|
+
EVALUATE_THREAD_ENDPOINT = "/v1/evaluate/threads/:threadId",
|
|
25
|
+
EVALUATE_TRACE_ENDPOINT = "/v1/evaluate/traces/:traceUuid",
|
|
26
|
+
EVALUATE_SPAN_ENDPOINT = "/v1/evaluate/spans/:spanUuid",
|
|
27
|
+
GUARD_ENDPOINT = "/guard",
|
|
28
|
+
GUARDRAILS_ENDPOINT = "/guardrails",
|
|
29
|
+
BASELINE_ATTACKS_ENDPOINT = "/generate-baseline-attacks",
|
|
30
|
+
SIMULATE_ENDPOINT = "/v1/simulate",
|
|
31
|
+
ANNOTATION_ENDPOINT = "/v1/annotations"
|
|
32
|
+
}
|
|
33
|
+
export declare class Api {
|
|
34
|
+
private apiKey;
|
|
35
|
+
private headers;
|
|
36
|
+
private baseApiUrl;
|
|
37
|
+
constructor(apiKey?: string, baseUrl?: string);
|
|
38
|
+
private static httpRequest;
|
|
39
|
+
sendRequest(method: HttpMethods, endpoint: Endpoints | string, body?: any, params?: Record<string, any>, endpointString?: string, urlParams?: Record<string, string>): Promise<any>;
|
|
40
|
+
}
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.Api = exports.Endpoints = exports.HttpMethods = exports.API_BASE_URL = exports.DEEPEVAL_BASE_URL = void 0;
|
|
7
|
+
const axios_1 = __importDefault(require("axios"));
|
|
8
|
+
const utils_1 = require("../utils");
|
|
9
|
+
const readline_1 = require("readline");
|
|
10
|
+
var Regions;
|
|
11
|
+
(function (Regions) {
|
|
12
|
+
Regions["EU"] = "EU";
|
|
13
|
+
Regions["US"] = "US";
|
|
14
|
+
Regions["AU"] = "AU";
|
|
15
|
+
})(Regions || (Regions = {}));
|
|
16
|
+
const _LOCAL_API_BASE_URL = "http://localhost:3001";
|
|
17
|
+
const _LOCAL_DEEPEVAL_BASE_URL = "http://0.0.0.0:8000";
|
|
18
|
+
const PROD_DEEPEVAL_BASE_URL = "https://deepeval.confident-ai.com";
|
|
19
|
+
const CONFIDENT_BASE_URL = "https://api.confident-ai.com";
|
|
20
|
+
const CONFIDENT_BASE_URL_EU = "https://eu.api.confident-ai.com";
|
|
21
|
+
const CONFIDENT_BASE_URL_AU = "https://au.api.confident-ai.com";
|
|
22
|
+
const region = process.env.CONFIDENT_REGION || Regions.US;
|
|
23
|
+
exports.DEEPEVAL_BASE_URL = PROD_DEEPEVAL_BASE_URL;
|
|
24
|
+
exports.API_BASE_URL = region === Regions.EU
|
|
25
|
+
? CONFIDENT_BASE_URL_EU
|
|
26
|
+
: region === Regions.AU
|
|
27
|
+
? CONFIDENT_BASE_URL_AU
|
|
28
|
+
: CONFIDENT_BASE_URL;
|
|
29
|
+
const inferBaseUrlFromApiKey = (apiKey) => {
|
|
30
|
+
if (apiKey.startsWith("confident_eu_")) {
|
|
31
|
+
return CONFIDENT_BASE_URL_EU;
|
|
32
|
+
}
|
|
33
|
+
else if (apiKey.startsWith("confident_au_")) {
|
|
34
|
+
return CONFIDENT_BASE_URL_AU;
|
|
35
|
+
}
|
|
36
|
+
return CONFIDENT_BASE_URL;
|
|
37
|
+
};
|
|
38
|
+
const RETRYABLE_ERROR_CODES = [
|
|
39
|
+
"ECONNRESET",
|
|
40
|
+
"ETIMEDOUT",
|
|
41
|
+
"ECONNREFUSED",
|
|
42
|
+
"ENOTFOUND",
|
|
43
|
+
"ENETUNREACH",
|
|
44
|
+
"ESOCKETTIMEDOUT",
|
|
45
|
+
"CERT_HAS_EXPIRED",
|
|
46
|
+
];
|
|
47
|
+
function logRetryError(error, attempt) {
|
|
48
|
+
console.error(`Confident AI Error: ${error}. Retrying: ${attempt} time(s)...`);
|
|
49
|
+
}
|
|
50
|
+
var HttpMethods;
|
|
51
|
+
(function (HttpMethods) {
|
|
52
|
+
HttpMethods["GET"] = "GET";
|
|
53
|
+
HttpMethods["POST"] = "POST";
|
|
54
|
+
HttpMethods["DELETE"] = "DELETE";
|
|
55
|
+
HttpMethods["PUT"] = "PUT";
|
|
56
|
+
})(HttpMethods || (exports.HttpMethods = HttpMethods = {}));
|
|
57
|
+
var Endpoints;
|
|
58
|
+
(function (Endpoints) {
|
|
59
|
+
Endpoints["DATASET_ENDPOINT"] = "/v1/datasets";
|
|
60
|
+
Endpoints["TEST_RUN_ENDPOINT"] = "/v1/test-run";
|
|
61
|
+
Endpoints["TRACING_ENDPOINT"] = "/v1/tracing";
|
|
62
|
+
Endpoints["TRACES_ENDPOINT"] = "/v1/traces";
|
|
63
|
+
Endpoints["EVENT_ENDPOINT"] = "/v1/event";
|
|
64
|
+
Endpoints["FEEDBACK_ENDPOINT"] = "/v1/feedback";
|
|
65
|
+
Endpoints["PROMPTS_VERSION_ID_ENDPOINT"] = "/v1/prompts/:alias/versions/:version";
|
|
66
|
+
Endpoints["PROMPTS_LABEL_ENDPOINT"] = "/v1/prompts/:alias/labels/:label";
|
|
67
|
+
Endpoints["PROMPTS_COMMITS_ENDPOINT"] = "/v1/prompts/:alias/commits";
|
|
68
|
+
Endpoints["PROMPTS_COMMIT_HASH_ENDPOINT"] = "/v1/prompts/:alias/commits/:hash";
|
|
69
|
+
Endpoints["PROMPTS_VERSIONS_ENDPOINT"] = "/v1/prompts/:alias/versions";
|
|
70
|
+
Endpoints["PROMPTS_ENDPOINT"] = "/v1/prompts";
|
|
71
|
+
Endpoints["RECOMMEND_ENDPOINT"] = "/v1/recommend-metrics";
|
|
72
|
+
Endpoints["EVALUATE_ENDPOINT"] = "/v1/evaluate";
|
|
73
|
+
Endpoints["EVALUATE_THREAD_ENDPOINT"] = "/v1/evaluate/threads/:threadId";
|
|
74
|
+
Endpoints["EVALUATE_TRACE_ENDPOINT"] = "/v1/evaluate/traces/:traceUuid";
|
|
75
|
+
Endpoints["EVALUATE_SPAN_ENDPOINT"] = "/v1/evaluate/spans/:spanUuid";
|
|
76
|
+
Endpoints["GUARD_ENDPOINT"] = "/guard";
|
|
77
|
+
Endpoints["GUARDRAILS_ENDPOINT"] = "/guardrails";
|
|
78
|
+
Endpoints["BASELINE_ATTACKS_ENDPOINT"] = "/generate-baseline-attacks";
|
|
79
|
+
Endpoints["SIMULATE_ENDPOINT"] = "/v1/simulate";
|
|
80
|
+
Endpoints["ANNOTATION_ENDPOINT"] = "/v1/annotations";
|
|
81
|
+
})(Endpoints || (exports.Endpoints = Endpoints = {}));
|
|
82
|
+
const defaultRetryOptions = {
|
|
83
|
+
maxAttempts: 5,
|
|
84
|
+
initialDelay: 1000,
|
|
85
|
+
maxDelay: 10000,
|
|
86
|
+
factor: 2,
|
|
87
|
+
jitter: true,
|
|
88
|
+
};
|
|
89
|
+
class Api {
|
|
90
|
+
apiKey;
|
|
91
|
+
headers;
|
|
92
|
+
baseApiUrl;
|
|
93
|
+
constructor(apiKey, baseUrl) {
|
|
94
|
+
if (!apiKey) {
|
|
95
|
+
apiKey = process.env.CONFIDENT_API_KEY;
|
|
96
|
+
}
|
|
97
|
+
if (!apiKey) {
|
|
98
|
+
throw new Error("Please provide a valid Confident AI API Key.");
|
|
99
|
+
}
|
|
100
|
+
// if region is set or url is provided, respect that
|
|
101
|
+
if (!process.env.CONFIDENT_REGION && !baseUrl) {
|
|
102
|
+
this.baseApiUrl = inferBaseUrlFromApiKey(apiKey);
|
|
103
|
+
}
|
|
104
|
+
else {
|
|
105
|
+
this.baseApiUrl = baseUrl || exports.API_BASE_URL;
|
|
106
|
+
}
|
|
107
|
+
this.apiKey = apiKey;
|
|
108
|
+
this.headers = {
|
|
109
|
+
"Content-Type": "application/json",
|
|
110
|
+
CONFIDENT_API_KEY: apiKey,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
static async httpRequest(method, url, headers, data, params, options = defaultRetryOptions) {
|
|
114
|
+
let attempt = 0;
|
|
115
|
+
let delay = options.initialDelay;
|
|
116
|
+
while (attempt < options.maxAttempts) {
|
|
117
|
+
try {
|
|
118
|
+
const response = await (0, axios_1.default)({
|
|
119
|
+
method,
|
|
120
|
+
url,
|
|
121
|
+
headers,
|
|
122
|
+
data,
|
|
123
|
+
params,
|
|
124
|
+
});
|
|
125
|
+
return response;
|
|
126
|
+
}
|
|
127
|
+
catch (error) {
|
|
128
|
+
attempt++;
|
|
129
|
+
// Check if error is retryable
|
|
130
|
+
const isRetryable = error.code &&
|
|
131
|
+
RETRYABLE_ERROR_CODES.some((code) => error.code.includes(code));
|
|
132
|
+
if (!isRetryable || attempt >= options.maxAttempts) {
|
|
133
|
+
throw error;
|
|
134
|
+
}
|
|
135
|
+
logRetryError(error, attempt);
|
|
136
|
+
// Calculate delay with exponential backoff and jitter
|
|
137
|
+
if (options.jitter) {
|
|
138
|
+
const jitterFactor = Math.random() + 0.5; // Random between 0.5 and 1.5
|
|
139
|
+
delay = Math.min(delay * options.factor * jitterFactor, options.maxDelay);
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
delay = Math.min(delay * options.factor, options.maxDelay);
|
|
143
|
+
}
|
|
144
|
+
await (0, utils_1.wait)(delay);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
throw new Error(`Request failed after ${options.maxAttempts} attempts`);
|
|
148
|
+
}
|
|
149
|
+
async sendRequest(method, endpoint, body, params, endpointString, urlParams) {
|
|
150
|
+
let endpointPath = endpointString || endpoint;
|
|
151
|
+
if (urlParams) {
|
|
152
|
+
for (const [key, value] of Object.entries(urlParams)) {
|
|
153
|
+
endpointPath = endpointPath.replace(`:${key}`, value);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
const url = `${this.baseApiUrl}${endpointPath}`;
|
|
157
|
+
try {
|
|
158
|
+
const res = await Api.httpRequest(method, url, this.headers, body, params);
|
|
159
|
+
if (res.status === 200) {
|
|
160
|
+
return res.data;
|
|
161
|
+
}
|
|
162
|
+
else if (res.status === 409 && body) {
|
|
163
|
+
const message = res.data?.message || "Conflict occurred.";
|
|
164
|
+
// In Node.js environment
|
|
165
|
+
if (typeof process !== "undefined" && process.stdin && process.stdout) {
|
|
166
|
+
const readline = (0, readline_1.createInterface)({
|
|
167
|
+
input: process.stdin,
|
|
168
|
+
output: process.stdout,
|
|
169
|
+
});
|
|
170
|
+
return new Promise((resolve) => {
|
|
171
|
+
readline.question(`${message} Would you like to overwrite it? [y/N] or change the alias [c]: `, (answer) => {
|
|
172
|
+
readline.close();
|
|
173
|
+
const userInput = answer.trim().toLowerCase();
|
|
174
|
+
if (userInput === "y") {
|
|
175
|
+
body.overwrite = true;
|
|
176
|
+
resolve(this.sendRequest(method, endpoint, body));
|
|
177
|
+
}
|
|
178
|
+
else if (userInput === "c") {
|
|
179
|
+
readline.question("Enter a new alias: ", (newAlias) => {
|
|
180
|
+
readline.close();
|
|
181
|
+
body.alias = newAlias.trim();
|
|
182
|
+
resolve(this.sendRequest(method, endpoint, body));
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
else {
|
|
186
|
+
console.log("Aborted.");
|
|
187
|
+
resolve(null);
|
|
188
|
+
}
|
|
189
|
+
});
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
else {
|
|
193
|
+
console.error("Conflict occurred. Please implement appropriate UI handling for this environment.");
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
else {
|
|
198
|
+
throw new Error(res.data?.error || res.statusText);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
catch (error) {
|
|
202
|
+
throw new Error(error.response?.data?.error || error.message);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
exports.Api = Api;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { LLMTestCase, ConversationalTestCase } from "../test-case";
|
|
2
|
+
import { Prompt } from "../prompt";
|
|
3
|
+
export declare function evaluate(params: {
|
|
4
|
+
metricCollection: string;
|
|
5
|
+
llmTestCases?: LLMTestCase[];
|
|
6
|
+
conversationalTestCases?: ConversationalTestCase[];
|
|
7
|
+
hyperparameters?: {
|
|
8
|
+
[key: string]: string | number | boolean | Prompt;
|
|
9
|
+
};
|
|
10
|
+
identifier?: string;
|
|
11
|
+
}): Promise<void>;
|