deepeval 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/README.md +162 -0
  2. package/dist/annotation/api.d.ts +15 -0
  3. package/dist/annotation/api.js +8 -0
  4. package/dist/annotation/index.d.ts +3 -0
  5. package/dist/annotation/index.js +36 -0
  6. package/dist/annotation/utils.d.ts +2 -0
  7. package/dist/annotation/utils.js +34 -0
  8. package/dist/confident/api.d.ts +40 -0
  9. package/dist/confident/api.js +206 -0
  10. package/dist/confident/evaluate.d.ts +11 -0
  11. package/dist/confident/evaluate.js +160 -0
  12. package/dist/confident/index.d.ts +6 -0
  13. package/dist/confident/index.js +24 -0
  14. package/dist/confident/types.d.ts +13 -0
  15. package/dist/confident/types.js +2 -0
  16. package/dist/config/settings.d.ts +11 -0
  17. package/dist/config/settings.js +30 -0
  18. package/dist/constants.d.ts +4 -0
  19. package/dist/constants.js +7 -0
  20. package/dist/dataset/api.d.ts +15 -0
  21. package/dist/dataset/api.js +2 -0
  22. package/dist/dataset/dataset.d.ts +54 -0
  23. package/dist/dataset/dataset.js +289 -0
  24. package/dist/dataset/golden.d.ts +61 -0
  25. package/dist/dataset/golden.js +65 -0
  26. package/dist/dataset/index.d.ts +7 -0
  27. package/dist/dataset/index.js +23 -0
  28. package/dist/dataset/utils.d.ts +9 -0
  29. package/dist/dataset/utils.js +116 -0
  30. package/dist/index.d.ts +13 -0
  31. package/dist/index.js +68 -0
  32. package/dist/integrations/ai-sdk/index.d.ts +29 -0
  33. package/dist/integrations/ai-sdk/index.js +121 -0
  34. package/dist/integrations/ai-sdk/processor.d.ts +17 -0
  35. package/dist/integrations/ai-sdk/processor.js +260 -0
  36. package/dist/integrations/index.d.ts +2 -0
  37. package/dist/integrations/index.js +7 -0
  38. package/dist/integrations/langchain/callback-handler.d.ts +36 -0
  39. package/dist/integrations/langchain/callback-handler.js +236 -0
  40. package/dist/integrations/langchain/index.d.ts +1 -0
  41. package/dist/integrations/langchain/index.js +5 -0
  42. package/dist/integrations/langchain/patch-tool.d.ts +1 -0
  43. package/dist/integrations/langchain/patch-tool.js +56 -0
  44. package/dist/integrations/langchain/utils.d.ts +49 -0
  45. package/dist/integrations/langchain/utils.js +266 -0
  46. package/dist/metrics/base-metrics.d.ts +30 -0
  47. package/dist/metrics/base-metrics.js +36 -0
  48. package/dist/models/base-model.d.ts +34 -0
  49. package/dist/models/base-model.js +27 -0
  50. package/dist/models/index.d.ts +1 -0
  51. package/dist/models/index.js +5 -0
  52. package/dist/openai/extractor.d.ts +9 -0
  53. package/dist/openai/extractor.js +140 -0
  54. package/dist/openai/index.d.ts +2 -0
  55. package/dist/openai/index.js +12 -0
  56. package/dist/openai/patch.d.ts +3 -0
  57. package/dist/openai/patch.js +147 -0
  58. package/dist/openai/types.d.ts +15 -0
  59. package/dist/openai/types.js +2 -0
  60. package/dist/openai/utils.d.ts +7 -0
  61. package/dist/openai/utils.js +174 -0
  62. package/dist/prompt/index.d.ts +61 -0
  63. package/dist/prompt/index.js +301 -0
  64. package/dist/prompt/types.d.ts +51 -0
  65. package/dist/prompt/types.js +157 -0
  66. package/dist/prompt/utils.d.ts +20 -0
  67. package/dist/prompt/utils.js +175 -0
  68. package/dist/simulate/index.d.ts +29 -0
  69. package/dist/simulate/index.js +176 -0
  70. package/dist/telemetry.d.ts +13 -0
  71. package/dist/telemetry.js +322 -0
  72. package/dist/test-case/index.d.ts +1 -0
  73. package/dist/test-case/index.js +12 -0
  74. package/dist/test-case/llm-test-case.d.ts +120 -0
  75. package/dist/test-case/llm-test-case.js +181 -0
  76. package/dist/test-case/utils.d.ts +13 -0
  77. package/dist/test-case/utils.js +33 -0
  78. package/dist/tracing/api.d.ts +91 -0
  79. package/dist/tracing/api.js +16 -0
  80. package/dist/tracing/index.d.ts +4 -0
  81. package/dist/tracing/index.js +19 -0
  82. package/dist/tracing/logging.d.ts +12 -0
  83. package/dist/tracing/logging.js +44 -0
  84. package/dist/tracing/offline-evals/api.d.ts +7 -0
  85. package/dist/tracing/offline-evals/api.js +17 -0
  86. package/dist/tracing/offline-evals/index.d.ts +3 -0
  87. package/dist/tracing/offline-evals/index.js +9 -0
  88. package/dist/tracing/offline-evals/span.d.ts +4 -0
  89. package/dist/tracing/offline-evals/span.js +18 -0
  90. package/dist/tracing/offline-evals/thread.d.ts +4 -0
  91. package/dist/tracing/offline-evals/thread.js +19 -0
  92. package/dist/tracing/offline-evals/trace.d.ts +4 -0
  93. package/dist/tracing/offline-evals/trace.js +18 -0
  94. package/dist/tracing/trace-context.d.ts +26 -0
  95. package/dist/tracing/trace-context.js +59 -0
  96. package/dist/tracing/tracing.d.ts +328 -0
  97. package/dist/tracing/tracing.js +1085 -0
  98. package/dist/tracing/utils.d.ts +11 -0
  99. package/dist/tracing/utils.js +45 -0
  100. package/dist/utils.d.ts +22 -0
  101. package/dist/utils.js +84 -0
  102. package/package.json +135 -0
@@ -0,0 +1,160 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.evaluate = evaluate;
4
+ const api_1 = require("./api");
5
+ const utils_1 = require("../utils");
6
+ const prompt_1 = require("../prompt");
7
+ function convertTurn(turn) {
8
+ const toolsCalled = turn.toolsCalled
9
+ ? turn.toolsCalled.map((tool) => ({
10
+ name: tool.name,
11
+ description: tool.description,
12
+ reasoning: tool.reasoning,
13
+ output: tool.output,
14
+ inputParameters: tool.inputParameters,
15
+ }))
16
+ : undefined;
17
+ return {
18
+ role: turn.role,
19
+ content: turn.content,
20
+ userId: turn.userId,
21
+ retrievalContext: turn.retrievalContext,
22
+ toolsCalled: toolsCalled,
23
+ additionalMetadata: turn.additionalMetadata,
24
+ };
25
+ }
26
+ function convertLLMTestCase(testCase) {
27
+ const toolsCalled = testCase.toolsCalled
28
+ ? testCase.toolsCalled.map((tool) => ({
29
+ name: tool.name,
30
+ description: tool.description,
31
+ reasoning: tool.reasoning,
32
+ output: tool.output,
33
+ inputParameters: tool.inputParameters,
34
+ }))
35
+ : undefined;
36
+ const expectedTools = testCase.expectedTools
37
+ ? testCase.expectedTools.map((tool) => ({
38
+ name: tool.name,
39
+ description: tool.description,
40
+ reasoning: tool.reasoning,
41
+ output: tool.output,
42
+ inputParameters: tool.inputParameters,
43
+ }))
44
+ : undefined;
45
+ return {
46
+ input: testCase.input,
47
+ actualOutput: testCase.actualOutput,
48
+ expectedOutput: testCase.expectedOutput,
49
+ context: testCase.context,
50
+ retrievalContext: testCase.retrievalContext,
51
+ additionalMetadata: testCase.additionalMetadata,
52
+ comments: testCase.comments,
53
+ toolsCalled: toolsCalled,
54
+ expectedTools: expectedTools,
55
+ reasoning: testCase.reasoning,
56
+ tokenCost: testCase.tokenCost,
57
+ completionTime: testCase.completionTime,
58
+ name: testCase.name,
59
+ };
60
+ }
61
+ function convertConversationalTestCase(testCase) {
62
+ const turns = testCase.turns.map(convertTurn);
63
+ return {
64
+ turns: turns,
65
+ scenario: testCase.scenario || undefined,
66
+ expectedOutcome: testCase.expectedOutcome || undefined,
67
+ userDescription: testCase.userDescription || undefined,
68
+ chatbotRole: testCase.chatbotRole || undefined,
69
+ };
70
+ }
71
+ async function processHyperparameters(hyperparameters) {
72
+ const processed = {};
73
+ for (const [key, value] of Object.entries(hyperparameters)) {
74
+ if (value instanceof prompt_1.Prompt) {
75
+ try {
76
+ if (!value.hash || value.hash === "latest" || !value.type) {
77
+ await value.push();
78
+ }
79
+ processed[key] = {
80
+ id: value.hash,
81
+ type: value.type || (value.textTemplate !== null ? "TEXT" : "LIST"),
82
+ };
83
+ }
84
+ catch (e) {
85
+ console.warn(`Failed to process prompt hyperparameter '${key}':`, e);
86
+ processed[key] = "Error processing prompt";
87
+ }
88
+ }
89
+ else {
90
+ processed[key] = String(value);
91
+ }
92
+ }
93
+ return processed;
94
+ }
95
+ async function evaluate(params) {
96
+ const { metricCollection, llmTestCases, conversationalTestCases, hyperparameters, identifier, } = params;
97
+ /////////////////////////////////////////////////////////
98
+ /// Type Checking
99
+ /////////////////////////////////////////////////////////
100
+ if ((llmTestCases?.length ?? 0) === 0 &&
101
+ (conversationalTestCases?.length ?? 0) === 0) {
102
+ throw new Error("You must provide either a non-empty array of 'llmTestCases' or 'conversationalTestCases'");
103
+ }
104
+ const testCaseLength = (llmTestCases?.length ?? 0) + (conversationalTestCases?.length ?? 0);
105
+ ////////////////////////////////////////////////////////
106
+ /// Posting Data
107
+ /////////////////////////////////////////////////////////
108
+ if ((0, utils_1.isConfident)()) {
109
+ console.log(`Sending ${testCaseLength} test case(s) to Confident AI...`);
110
+ const startTime = performance.now();
111
+ try {
112
+ const api = new api_1.Api(undefined, api_1.API_BASE_URL);
113
+ let processedHyperparameters;
114
+ if (hyperparameters) {
115
+ processedHyperparameters = await processHyperparameters(hyperparameters);
116
+ }
117
+ let confidentRequestData;
118
+ if (llmTestCases) {
119
+ const convertedTestCases = llmTestCases.map(convertLLMTestCase);
120
+ confidentRequestData = {
121
+ metricCollection,
122
+ llmTestCases: convertedTestCases,
123
+ hyperparameters: processedHyperparameters,
124
+ identifier,
125
+ };
126
+ }
127
+ else if (conversationalTestCases) {
128
+ const convertedTestCases = conversationalTestCases.map(convertConversationalTestCase);
129
+ confidentRequestData = {
130
+ metricCollection,
131
+ conversationalTestCases: convertedTestCases,
132
+ hyperparameters: processedHyperparameters,
133
+ identifier,
134
+ };
135
+ }
136
+ else {
137
+ throw new Error("You must provide either a non-empty array of 'llmTestCases' or 'conversationalTestCases'");
138
+ }
139
+ const result = await api.sendRequest(api_1.HttpMethods.POST, api_1.Endpoints.EVALUATE_ENDPOINT, confidentRequestData);
140
+ const endTime = performance.now();
141
+ const timeTaken = ((endTime - startTime) / 1000).toFixed(2);
142
+ if (result) {
143
+ const response = {
144
+ link: result.link,
145
+ };
146
+ console.log(`Done! (${timeTaken}s)`);
147
+ console.log(`✓ Evaluation of metric collection '${metricCollection}' started! View progress on ${response.link}`);
148
+ }
149
+ }
150
+ catch (error) {
151
+ const endTime = performance.now();
152
+ const timeTaken = ((endTime - startTime) / 1000).toFixed(2);
153
+ console.error(`Error! (${timeTaken}s)`);
154
+ throw error;
155
+ }
156
+ }
157
+ else {
158
+ throw new Error("To run evaluations on Confident AI, run `deepeval login`.");
159
+ }
160
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Exports for the confident module
3
+ */
4
+ export * from "./api";
5
+ export * from "./types";
6
+ export { evaluate } from "./evaluate";
@@ -0,0 +1,24 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.evaluate = void 0;
18
+ /**
19
+ * Exports for the confident module
20
+ */
21
+ __exportStar(require("./api"), exports);
22
+ __exportStar(require("./types"), exports);
23
+ var evaluate_1 = require("./evaluate");
24
+ Object.defineProperty(exports, "evaluate", { enumerable: true, get: function () { return evaluate_1.evaluate; } });
@@ -0,0 +1,13 @@
1
+ import { ConversationalTestCase, LLMTestCase } from "../test-case";
2
+ export interface ConfidentEvaluateRequestData {
3
+ metricCollection: string;
4
+ llmTestCases?: LLMTestCase[];
5
+ conversationalTestCases?: ConversationalTestCase[];
6
+ hyperparameters?: {
7
+ [key: string]: string;
8
+ };
9
+ identifier?: string;
10
+ }
11
+ export interface ConfidentEvaluateResponseData {
12
+ link: string;
13
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,11 @@
1
+ import { Environment } from "../tracing/utils";
2
+ export interface Settings {
3
+ CONFIDENT_TRACE_ENVIRONMENT?: Environment;
4
+ CONFIDENT_TRACE_VERBOSE?: boolean;
5
+ CONFIDENT_TRACE_SAMPLE_RATE?: number;
6
+ CONFIDENT_OTEL_URL?: string;
7
+ }
8
+ export declare function getSettings(): Settings;
9
+ export declare function resetSettings({ reloadDotenv, }?: {
10
+ reloadDotenv?: boolean;
11
+ }): Settings;
@@ -0,0 +1,30 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getSettings = getSettings;
4
+ exports.resetSettings = resetSettings;
5
+ const utils_1 = require("../tracing/utils");
6
+ let _settings_singleton = null;
7
+ function getSettings() {
8
+ if (_settings_singleton === null) {
9
+ _settings_singleton = {
10
+ CONFIDENT_TRACE_ENVIRONMENT: process.env.CONFIDENT_TRACE_ENVIRONMENT ||
11
+ utils_1.Environment.DEVELOPMENT,
12
+ CONFIDENT_TRACE_VERBOSE: process.env.CONFIDENT_TRACE_VERBOSE !== undefined
13
+ ? ["yes", "true", "1"].includes(process.env.CONFIDENT_TRACE_VERBOSE.toLowerCase())
14
+ : true,
15
+ CONFIDENT_TRACE_SAMPLE_RATE: process.env.CONFIDENT_TRACE_SAMPLE_RATE !== undefined
16
+ ? parseFloat(process.env.CONFIDENT_TRACE_SAMPLE_RATE)
17
+ : 1.0,
18
+ CONFIDENT_OTEL_URL: process.env.CONFIDENT_OTEL_URL ||
19
+ "https://otel.confident-ai.com",
20
+ };
21
+ }
22
+ return _settings_singleton;
23
+ }
24
+ function resetSettings({ reloadDotenv = false, } = {}) {
25
+ if (reloadDotenv) {
26
+ // TODO
27
+ }
28
+ _settings_singleton = null;
29
+ return getSettings();
30
+ }
@@ -0,0 +1,4 @@
1
+ export declare const CONFIDENT_TRACE_VERBOSE = "CONFIDENT_TRACE_VERBOSE";
2
+ export declare const CONFIDENT_TRACE_SAMPLE_RATE = "CONFIDENT_TRACE_SAMPLE_RATE";
3
+ export declare const CONFIDENT_TRACE_ENVIRONMENT = "CONFIDENT_TRACE_ENVIRONMENT";
4
+ export declare const CONFIDENT_TRACING_ENABLED = "CONFIDENT_TRACING_ENABLED";
@@ -0,0 +1,7 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CONFIDENT_TRACING_ENABLED = exports.CONFIDENT_TRACE_ENVIRONMENT = exports.CONFIDENT_TRACE_SAMPLE_RATE = exports.CONFIDENT_TRACE_VERBOSE = void 0;
4
+ exports.CONFIDENT_TRACE_VERBOSE = "CONFIDENT_TRACE_VERBOSE";
5
+ exports.CONFIDENT_TRACE_SAMPLE_RATE = "CONFIDENT_TRACE_SAMPLE_RATE";
6
+ exports.CONFIDENT_TRACE_ENVIRONMENT = "CONFIDENT_TRACE_ENVIRONMENT";
7
+ exports.CONFIDENT_TRACING_ENABLED = "CONFIDENT_TRACING_ENABLED";
@@ -0,0 +1,15 @@
1
+ import { ConversationalGolden, Golden } from "./golden";
2
+ export interface APIDataset {
3
+ alias: string;
4
+ overwrite?: boolean;
5
+ goldens?: Golden[];
6
+ conversationalGoldens?: any[];
7
+ }
8
+ export interface CreateDatasetHttpResponse {
9
+ link: string;
10
+ }
11
+ export interface DatasetHttpResponse {
12
+ goldens: Golden[];
13
+ conversationalGoldens: ConversationalGolden[];
14
+ id: string;
15
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,54 @@
1
+ import { ConversationalGolden, Golden } from "./golden";
2
+ import { ConversationalTestCase, LLMTestCase } from "../test-case";
3
+ export type GoldenUnion = Golden | ConversationalGolden;
4
+ export type GoldenUnionArray = Golden[] | ConversationalGolden[];
5
+ export type TestCaseUnion = LLMTestCase | ConversationalTestCase;
6
+ export type TestCaseUnionArray = LLMTestCase[] | ConversationalTestCase[];
7
+ export declare class EvaluationDataset {
8
+ private _multiTurn;
9
+ private _alias;
10
+ private _id;
11
+ private _goldens;
12
+ private _conversationalGoldens;
13
+ private _llmTestCases;
14
+ private _conversationalTestCases;
15
+ constructor(params?: {
16
+ goldens?: GoldenUnionArray;
17
+ });
18
+ toString(): string;
19
+ get goldens(): GoldenUnionArray;
20
+ set goldens(goldens: GoldenUnionArray);
21
+ addGolden(golden: GoldenUnion): void;
22
+ private _addGolden;
23
+ private _addConversationalGolden;
24
+ get testCases(): TestCaseUnionArray;
25
+ set testCases(testCases: TestCaseUnionArray);
26
+ addTestCase(testCase: TestCaseUnion): void;
27
+ pull(params: {
28
+ alias: string;
29
+ finalized?: boolean;
30
+ autoConvertGoldensToTestCases?: boolean;
31
+ }): Promise<void>;
32
+ push(params: {
33
+ alias: string;
34
+ overwrite?: boolean;
35
+ }): Promise<void>;
36
+ queue(params: {
37
+ alias: string;
38
+ goldens: Array<Golden | ConversationalGolden>;
39
+ printResponse?: boolean;
40
+ }): Promise<void>;
41
+ addTestCasesFromCSV({ filePath, inputCol, actualOutputCol, expectedOutputCol, contextCol, contextDelimiter, retrievalContextCol, retrievalContextDelimiter, toolsCalledCol, expectedToolsCol, additionalMetadataCol, }: {
42
+ filePath: string;
43
+ inputCol: string;
44
+ actualOutputCol: string;
45
+ expectedOutputCol?: string;
46
+ contextCol?: string;
47
+ contextDelimiter?: string;
48
+ retrievalContextCol?: string;
49
+ retrievalContextDelimiter?: string;
50
+ toolsCalledCol?: string;
51
+ expectedToolsCol?: string;
52
+ additionalMetadataCol?: string;
53
+ }): Promise<any>;
54
+ }
@@ -0,0 +1,289 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.EvaluationDataset = void 0;
7
+ const node_fs_1 = __importDefault(require("node:fs"));
8
+ const papaparse_1 = __importDefault(require("papaparse"));
9
+ const utils_1 = require("./utils");
10
+ const utils_2 = require("../utils");
11
+ const api_1 = require("../confident/api");
12
+ const golden_1 = require("./golden");
13
+ const test_case_1 = require("../test-case");
14
+ class EvaluationDataset {
15
+ _multiTurn = null;
16
+ _alias = null;
17
+ _id = null;
18
+ _goldens = [];
19
+ _conversationalGoldens = [];
20
+ _llmTestCases = [];
21
+ _conversationalTestCases = [];
22
+ constructor(params = {}) {
23
+ this._alias = null;
24
+ this._id = null;
25
+ const goldens = params.goldens ?? [];
26
+ if (goldens.length > 0) {
27
+ this._multiTurn = goldens[0] instanceof golden_1.ConversationalGolden;
28
+ }
29
+ this._goldens = [];
30
+ this._conversationalGoldens = [];
31
+ for (const golden of goldens) {
32
+ golden._datasetRank = goldens.length;
33
+ if (this._multiTurn) {
34
+ this._addConversationalGolden(golden);
35
+ }
36
+ else {
37
+ this._addGolden(golden);
38
+ }
39
+ }
40
+ this._llmTestCases = [];
41
+ this._conversationalTestCases = [];
42
+ }
43
+ toString() {
44
+ return `${this.constructor.name}(test_cases=${JSON.stringify(this.testCases)}, goldens=${JSON.stringify(this.goldens)}, _alias=${this._alias}, _id=${this._id}, _multi_turn=${this._multiTurn})`;
45
+ }
46
+ ////////////////////////////////////////////////////////
47
+ // Golden Properties
48
+ ////////////////////////////////////////////////////////
49
+ get goldens() {
50
+ return this._multiTurn ? this._conversationalGoldens : this._goldens;
51
+ }
52
+ set goldens(goldens) {
53
+ const prevGoldens = this._goldens;
54
+ const prevConvGoldens = this._conversationalGoldens;
55
+ this._goldens = [];
56
+ this._conversationalGoldens = [];
57
+ try {
58
+ for (const golden of goldens) {
59
+ if (!(golden instanceof golden_1.Golden) &&
60
+ !(golden instanceof golden_1.ConversationalGolden)) {
61
+ throw new TypeError("Your goldens must be instances of either ConversationalGolden or Golden");
62
+ }
63
+ golden._datasetAlias = this._alias ?? undefined;
64
+ golden._datasetId = this._id ?? undefined;
65
+ golden._datasetRank = goldens.length;
66
+ if (this._multiTurn) {
67
+ this._addConversationalGolden(golden);
68
+ }
69
+ else {
70
+ this.addGolden(golden);
71
+ }
72
+ }
73
+ }
74
+ catch (e) {
75
+ this._goldens = prevGoldens;
76
+ this._conversationalGoldens = prevConvGoldens;
77
+ throw e;
78
+ }
79
+ }
80
+ addGolden(golden) {
81
+ if (this._multiTurn === null) {
82
+ this._multiTurn = golden instanceof golden_1.ConversationalGolden;
83
+ }
84
+ if (this._multiTurn) {
85
+ this._addConversationalGolden(golden);
86
+ }
87
+ else {
88
+ this._addGolden(golden);
89
+ }
90
+ }
91
+ _addGolden(golden) {
92
+ if (golden instanceof golden_1.Golden) {
93
+ this._goldens.push(golden);
94
+ }
95
+ else {
96
+ throw new TypeError("You cannot add a multi-turn ConversationalGolden to a single-turn dataset. You can only add a Golden.");
97
+ }
98
+ }
99
+ _addConversationalGolden(golden) {
100
+ if (golden instanceof golden_1.ConversationalGolden) {
101
+ this._conversationalGoldens.push(golden);
102
+ }
103
+ else {
104
+ throw new TypeError("You cannot add a single-turn Golden to a multi-turn dataset. You can only add a ConversationalGolden.");
105
+ }
106
+ }
107
+ ////////////////////////////////////////////////////////
108
+ // Test Case Properties
109
+ ////////////////////////////////////////////////////////
110
+ get testCases() {
111
+ return this._multiTurn ? this._conversationalTestCases : this._llmTestCases;
112
+ }
113
+ set testCases(testCases) {
114
+ const llmTestCases = [];
115
+ const conversationalTestCases = [];
116
+ for (const testCase of testCases) {
117
+ if (!(testCase instanceof test_case_1.LLMTestCase) &&
118
+ !(testCase instanceof test_case_1.ConversationalTestCase)) {
119
+ continue;
120
+ }
121
+ testCase._datasetAlias = this._alias ?? undefined;
122
+ testCase._datasetId = this._id ?? undefined;
123
+ if (testCase instanceof test_case_1.LLMTestCase) {
124
+ testCase._datasetRank = llmTestCases.length;
125
+ llmTestCases.push(testCase);
126
+ }
127
+ else if (testCase instanceof test_case_1.ConversationalTestCase) {
128
+ testCase._datasetRank = conversationalTestCases.length;
129
+ conversationalTestCases.push(testCase);
130
+ }
131
+ }
132
+ this._llmTestCases = llmTestCases;
133
+ this._conversationalTestCases = conversationalTestCases;
134
+ }
135
+ addTestCase(testCase) {
136
+ testCase._datasetAlias = this._alias ?? undefined;
137
+ testCase._datasetId = this._id ?? undefined;
138
+ if (testCase instanceof test_case_1.LLMTestCase) {
139
+ testCase._datasetRank = this._llmTestCases.length;
140
+ this._llmTestCases.push(testCase);
141
+ }
142
+ else if (testCase instanceof test_case_1.ConversationalTestCase) {
143
+ testCase._datasetRank = this._conversationalTestCases.length;
144
+ this._conversationalTestCases.push(testCase);
145
+ }
146
+ }
147
+ ////////////////////////////////////////////////////////
148
+ // Push and Pull Methods
149
+ ////////////////////////////////////////////////////////
150
+ async pull(params) {
151
+ const { alias, finalized = true, autoConvertGoldensToTestCases = false, } = params;
152
+ if (!(0, utils_2.isConfident)()) {
153
+ throw new Error("Set CONFIDENT_API_KEY to pull dataset.");
154
+ }
155
+ console.log(`Pulling '${alias}' from Confident AI...`);
156
+ const api = new api_1.Api();
157
+ const startTime = performance.now();
158
+ const result = await api.sendRequest(api_1.HttpMethods.GET, api_1.Endpoints.DATASET_ENDPOINT, undefined, { alias, finalized: finalized.toString().toLowerCase() });
159
+ const response = {
160
+ goldens: result.goldens
161
+ ? result.goldens.map((goldenData) => new golden_1.Golden({
162
+ input: goldenData.input,
163
+ actualOutput: goldenData.actualOutput,
164
+ expectedOutput: goldenData.expectedOutput,
165
+ context: goldenData.context,
166
+ retrievalContext: goldenData.retrievalContext,
167
+ toolsCalled: goldenData.toolsCalled,
168
+ expectedTools: goldenData.expectedTools,
169
+ additionalMetadata: goldenData.additionalMetadata,
170
+ sourceFile: goldenData.sourceFile,
171
+ comments: goldenData.comments,
172
+ }))
173
+ : undefined,
174
+ conversationalGoldens: result.conversationalGoldens
175
+ ? result.conversationalGoldens.map((goldenData) => new golden_1.ConversationalGolden({
176
+ scenario: goldenData.scenario,
177
+ expectedOutcome: goldenData.expectedOutcome,
178
+ userDescription: goldenData.userDescription,
179
+ context: goldenData.context,
180
+ additionalMetadata: goldenData.additionalMetadata,
181
+ comments: goldenData.comments,
182
+ name: goldenData.name,
183
+ customColumnKeyValues: goldenData.customColumnKeyValues,
184
+ turns: goldenData.turns,
185
+ _datasetRank: goldenData._datasetRank,
186
+ _datasetAlias: goldenData._datasetAlias,
187
+ _datasetId: goldenData._datasetId,
188
+ }))
189
+ : undefined,
190
+ id: result.datasetId,
191
+ };
192
+ this._alias = alias;
193
+ this._id = response.id;
194
+ this._multiTurn = result.goldens === undefined;
195
+ this.goldens = [];
196
+ this.testCases = [];
197
+ if (autoConvertGoldensToTestCases) {
198
+ if (!this._multiTurn) {
199
+ const llmTestCases = (0, utils_1.convertGoldensToTestCases)(response.goldens, alias, response.id);
200
+ this._llmTestCases.push(...llmTestCases);
201
+ }
202
+ else {
203
+ const conversationalTestCases = (0, utils_1.convertConvoGoldensToConvoTestCases)(response.conversationalGoldens, alias, response.id);
204
+ this._conversationalTestCases.push(...conversationalTestCases);
205
+ }
206
+ }
207
+ else {
208
+ if (!this._multiTurn) {
209
+ this.goldens = response.goldens;
210
+ }
211
+ else {
212
+ this.goldens = response.conversationalGoldens;
213
+ for (const golden of this.goldens) {
214
+ golden._datasetAlias = alias;
215
+ golden._datasetId = response.id;
216
+ }
217
+ }
218
+ }
219
+ const endTime = performance.now();
220
+ const timeTaken = ((endTime - startTime) / 1000).toFixed(2);
221
+ console.log(`Done! (${timeTaken}s)`);
222
+ }
223
+ async push(params) {
224
+ const { alias, overwrite = false } = params;
225
+ if (this.goldens.length === 0) {
226
+ throw new Error("Unable to push empty dataset to Confident AI, there must be at least one golden in dataset.");
227
+ }
228
+ const api = new api_1.Api();
229
+ const apiDataset = {
230
+ alias,
231
+ overwrite,
232
+ goldens: !this._multiTurn ? this.goldens : undefined,
233
+ conversationalGoldens: this._multiTurn ? this.goldens : undefined,
234
+ };
235
+ const body = (0, utils_1.stripPrivateFields)(JSON.parse(JSON.stringify(apiDataset)));
236
+ console.log(`Pushing '${alias}' to Confident AI...`);
237
+ const result = await api.sendRequest(api_1.HttpMethods.POST, api_1.Endpoints.DATASET_ENDPOINT, body);
238
+ const link = result?.link;
239
+ if (link) {
240
+ console.log(`✅ Dataset successfully pushed to Confident AI! View at: ${link}`);
241
+ }
242
+ }
243
+ ////////////////////////////////////////////////////////
244
+ // Queue Methods
245
+ ////////////////////////////////////////////////////////
246
+ async queue(params) {
247
+ const { alias, goldens, printResponse = true } = params;
248
+ if (!goldens || goldens.length === 0) {
249
+ throw new Error(`Can't queue empty list of goldens to dataset with alias: ${alias} on Confident AI.`);
250
+ }
251
+ const api = new api_1.Api();
252
+ const isMultiTurn = goldens[0] instanceof golden_1.ConversationalGolden;
253
+ const apiDataset = {
254
+ alias,
255
+ goldens: !isMultiTurn ? goldens : undefined,
256
+ conversationalGoldens: isMultiTurn ? goldens : undefined,
257
+ };
258
+ const body = (0, utils_1.stripPrivateFields)(apiDataset);
259
+ console.log(`Queueing ${goldens.length} golden(s) to '${alias}' on Confident AI...`);
260
+ const result = await api.sendRequest(api_1.HttpMethods.POST, api_1.Endpoints.DATASET_ENDPOINT, body, undefined, `/v1/datasets/${alias}/queue`);
261
+ const link = result?.link;
262
+ if (link && printResponse) {
263
+ console.log(`✅ Goldens successfully queued to Confident AI! Annotate & finalize at: ${link}`);
264
+ }
265
+ }
266
+ async addTestCasesFromCSV({ filePath, inputCol, actualOutputCol, expectedOutputCol, contextCol, contextDelimiter = ";", retrievalContextCol, retrievalContextDelimiter = ";", toolsCalledCol, expectedToolsCol, additionalMetadataCol, }) {
267
+ const csvData = node_fs_1.default.readFileSync(filePath, "utf8");
268
+ const { data, errors } = papaparse_1.default.parse(csvData, {
269
+ header: true,
270
+ skipEmptyLines: true,
271
+ });
272
+ if (errors.length) {
273
+ throw new Error(`CSV parse error: ${errors[0].message}`);
274
+ }
275
+ return data.map((row) => new test_case_1.LLMTestCase({
276
+ input: row[inputCol],
277
+ actualOutput: row[actualOutputCol],
278
+ expectedOutput: expectedOutputCol
279
+ ? row[expectedOutputCol]
280
+ : undefined,
281
+ context: (0, utils_1.parseDelimited)(row[contextCol], contextDelimiter),
282
+ retrievalContext: (0, utils_1.parseDelimited)(row[retrievalContextCol], retrievalContextDelimiter),
283
+ toolsCalled: (0, utils_1.safeJsonParse)(row[toolsCalledCol], []),
284
+ expectedTools: (0, utils_1.safeJsonParse)(row[expectedToolsCol], []),
285
+ additionalMetadata: (0, utils_1.safeJsonParse)(row[additionalMetadataCol], undefined),
286
+ }));
287
+ }
288
+ }
289
+ exports.EvaluationDataset = EvaluationDataset;