@launchdarkly/server-sdk-ai 0.14.1 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/dist/index.cjs +1117 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +1022 -0
- package/dist/index.d.ts +1022 -0
- package/dist/index.js +1071 -0
- package/dist/index.js.map +1 -0
- package/package.json +22 -5
- package/__tests__/Judge.test.ts +0 -521
- package/__tests__/LDAIClientImpl.test.ts +0 -594
- package/__tests__/LDAIConfigTrackerImpl.test.ts +0 -815
- package/__tests__/TokenUsage.test.ts +0 -119
- package/__tests__/TrackedChat.test.ts +0 -231
- package/dist/package.json +0 -53
- package/dist/src/LDAIClientImpl.d.ts +0 -39
- package/dist/src/LDAIClientImpl.d.ts.map +0 -1
- package/dist/src/LDAIClientImpl.js +0 -164
- package/dist/src/LDAIClientImpl.js.map +0 -1
- package/dist/src/LDAIConfigTrackerImpl.d.ts +0 -74
- package/dist/src/LDAIConfigTrackerImpl.d.ts.map +0 -1
- package/dist/src/LDAIConfigTrackerImpl.js +0 -207
- package/dist/src/LDAIConfigTrackerImpl.js.map +0 -1
- package/dist/src/LDClientMin.d.ts +0 -11
- package/dist/src/LDClientMin.d.ts.map +0 -1
- package/dist/src/LDClientMin.js +0 -3
- package/dist/src/LDClientMin.js.map +0 -1
- package/dist/src/api/LDAIClient.d.ts +0 -258
- package/dist/src/api/LDAIClient.d.ts.map +0 -1
- package/dist/src/api/LDAIClient.js +0 -3
- package/dist/src/api/LDAIClient.js.map +0 -1
- package/dist/src/api/chat/TrackedChat.d.ts +0 -72
- package/dist/src/api/chat/TrackedChat.d.ts.map +0 -1
- package/dist/src/api/chat/TrackedChat.js +0 -125
- package/dist/src/api/chat/TrackedChat.js.map +0 -1
- package/dist/src/api/chat/index.d.ts +0 -3
- package/dist/src/api/chat/index.d.ts.map +0 -1
- package/dist/src/api/chat/index.js +0 -19
- package/dist/src/api/chat/index.js.map +0 -1
- package/dist/src/api/chat/types.d.ts +0 -22
- package/dist/src/api/chat/types.d.ts.map +0 -1
- package/dist/src/api/chat/types.js +0 -3
- package/dist/src/api/chat/types.js.map +0 -1
- package/dist/src/api/config/LDAIConfigTracker.d.ts +0 -209
- package/dist/src/api/config/LDAIConfigTracker.d.ts.map +0 -1
- package/dist/src/api/config/LDAIConfigTracker.js +0 -3
- package/dist/src/api/config/LDAIConfigTracker.js.map +0 -1
- package/dist/src/api/config/LDAIConfigUtils.d.ts +0 -2
- package/dist/src/api/config/LDAIConfigUtils.d.ts.map +0 -1
- package/dist/src/api/config/LDAIConfigUtils.js +0 -145
- package/dist/src/api/config/LDAIConfigUtils.js.map +0 -1
- package/dist/src/api/config/index.d.ts +0 -3
- package/dist/src/api/config/index.d.ts.map +0 -1
- package/dist/src/api/config/index.js +0 -18
- package/dist/src/api/config/index.js.map +0 -1
- package/dist/src/api/config/types.d.ts +0 -206
- package/dist/src/api/config/types.d.ts.map +0 -1
- package/dist/src/api/config/types.js +0 -3
- package/dist/src/api/config/types.js.map +0 -1
- package/dist/src/api/index.d.ts +0 -7
- package/dist/src/api/index.d.ts.map +0 -1
- package/dist/src/api/index.js +0 -23
- package/dist/src/api/index.js.map +0 -1
- package/dist/src/api/judge/EvaluationSchemaBuilder.d.ts +0 -11
- package/dist/src/api/judge/EvaluationSchemaBuilder.d.ts.map +0 -1
- package/dist/src/api/judge/EvaluationSchemaBuilder.js +0 -52
- package/dist/src/api/judge/EvaluationSchemaBuilder.js.map +0 -1
- package/dist/src/api/judge/Judge.d.ts +0 -63
- package/dist/src/api/judge/Judge.d.ts.map +0 -1
- package/dist/src/api/judge/Judge.js +0 -151
- package/dist/src/api/judge/Judge.js.map +0 -1
- package/dist/src/api/judge/index.d.ts +0 -3
- package/dist/src/api/judge/index.d.ts.map +0 -1
- package/dist/src/api/judge/index.js +0 -6
- package/dist/src/api/judge/index.js.map +0 -1
- package/dist/src/api/judge/types.d.ts +0 -37
- package/dist/src/api/judge/types.d.ts.map +0 -1
- package/dist/src/api/judge/types.js +0 -3
- package/dist/src/api/judge/types.js.map +0 -1
- package/dist/src/api/metrics/BedrockTokenUsage.d.ts +0 -7
- package/dist/src/api/metrics/BedrockTokenUsage.d.ts.map +0 -1
- package/dist/src/api/metrics/BedrockTokenUsage.js +0 -12
- package/dist/src/api/metrics/BedrockTokenUsage.js.map +0 -1
- package/dist/src/api/metrics/LDAIMetrics.d.ts +0 -17
- package/dist/src/api/metrics/LDAIMetrics.d.ts.map +0 -1
- package/dist/src/api/metrics/LDAIMetrics.js +0 -3
- package/dist/src/api/metrics/LDAIMetrics.js.map +0 -1
- package/dist/src/api/metrics/LDFeedbackKind.d.ts +0 -14
- package/dist/src/api/metrics/LDFeedbackKind.d.ts.map +0 -1
- package/dist/src/api/metrics/LDFeedbackKind.js +0 -18
- package/dist/src/api/metrics/LDFeedbackKind.js.map +0 -1
- package/dist/src/api/metrics/LDTokenUsage.d.ts +0 -18
- package/dist/src/api/metrics/LDTokenUsage.d.ts.map +0 -1
- package/dist/src/api/metrics/LDTokenUsage.js +0 -3
- package/dist/src/api/metrics/LDTokenUsage.js.map +0 -1
- package/dist/src/api/metrics/OpenAiUsage.d.ts +0 -7
- package/dist/src/api/metrics/OpenAiUsage.d.ts.map +0 -1
- package/dist/src/api/metrics/OpenAiUsage.js +0 -13
- package/dist/src/api/metrics/OpenAiUsage.js.map +0 -1
- package/dist/src/api/metrics/VercelAISDKTokenUsage.d.ts +0 -9
- package/dist/src/api/metrics/VercelAISDKTokenUsage.d.ts.map +0 -1
- package/dist/src/api/metrics/VercelAISDKTokenUsage.js +0 -13
- package/dist/src/api/metrics/VercelAISDKTokenUsage.js.map +0 -1
- package/dist/src/api/metrics/index.d.ts +0 -7
- package/dist/src/api/metrics/index.d.ts.map +0 -1
- package/dist/src/api/metrics/index.js +0 -23
- package/dist/src/api/metrics/index.js.map +0 -1
- package/dist/src/api/providers/AIProvider.d.ts +0 -52
- package/dist/src/api/providers/AIProvider.d.ts.map +0 -1
- package/dist/src/api/providers/AIProvider.js +0 -88
- package/dist/src/api/providers/AIProvider.js.map +0 -1
- package/dist/src/api/providers/AIProviderFactory.d.ts +0 -39
- package/dist/src/api/providers/AIProviderFactory.d.ts.map +0 -1
- package/dist/src/api/providers/AIProviderFactory.js +0 -102
- package/dist/src/api/providers/AIProviderFactory.js.map +0 -1
- package/dist/src/api/providers/index.d.ts +0 -3
- package/dist/src/api/providers/index.d.ts.map +0 -1
- package/dist/src/api/providers/index.js +0 -19
- package/dist/src/api/providers/index.js.map +0 -1
- package/dist/src/index.d.ts +0 -19
- package/dist/src/index.d.ts.map +0 -1
- package/dist/src/index.js +0 -29
- package/dist/src/index.js.map +0 -1
- package/docs/.nojekyll +0 -1
- package/docs/assets/highlight.css +0 -92
- package/docs/assets/main.js +0 -58
- package/docs/assets/search.js +0 -1
- package/docs/assets/style.css +0 -1379
- package/docs/classes/AIProvider.html +0 -210
- package/docs/classes/AIProviderFactory.html +0 -208
- package/docs/classes/Judge.html +0 -322
- package/docs/classes/TrackedChat.html +0 -322
- package/docs/enums/LDFeedbackKind.html +0 -115
- package/docs/functions/createBedrockTokenUsage.html +0 -94
- package/docs/functions/createOpenAiUsage.html +0 -94
- package/docs/functions/createVercelAISDKTokenUsage.html +0 -98
- package/docs/functions/initAi.html +0 -93
- package/docs/index.html +0 -136
- package/docs/interfaces/ChatResponse.html +0 -130
- package/docs/interfaces/EvalScore.html +0 -119
- package/docs/interfaces/JudgeResponse.html +0 -139
- package/docs/interfaces/LDAIAgentConfig.html +0 -178
- package/docs/interfaces/LDAIAgentConfigDefault.html +0 -155
- package/docs/interfaces/LDAIAgentRequestConfig.html +0 -129
- package/docs/interfaces/LDAIClient.html +0 -449
- package/docs/interfaces/LDAICompletionConfig.html +0 -178
- package/docs/interfaces/LDAICompletionConfigDefault.html +0 -155
- package/docs/interfaces/LDAIConfig.html +0 -158
- package/docs/interfaces/LDAIConfigDefault.html +0 -133
- package/docs/interfaces/LDAIConfigTracker.html +0 -530
- package/docs/interfaces/LDAIJudgeConfig.html +0 -178
- package/docs/interfaces/LDAIJudgeConfigDefault.html +0 -155
- package/docs/interfaces/LDAIMetrics.html +0 -121
- package/docs/interfaces/LDJudge.html +0 -119
- package/docs/interfaces/LDJudgeConfiguration.html +0 -109
- package/docs/interfaces/LDLogger.html +0 -189
- package/docs/interfaces/LDMessage.html +0 -119
- package/docs/interfaces/LDModelConfig.html +0 -139
- package/docs/interfaces/LDProviderConfig.html +0 -105
- package/docs/interfaces/LDTokenUsage.html +0 -129
- package/docs/interfaces/StructuredResponse.html +0 -129
- package/docs/types/LDAIConfigDefaultKind.html +0 -81
- package/docs/types/LDAIConfigKind.html +0 -81
- package/docs/types/LDAIConfigMode.html +0 -81
- package/docs/types/SupportedAIProvider.html +0 -81
- package/docs/variables/SUPPORTED_AI_PROVIDERS.html +0 -81
- package/jest.config.js +0 -7
- package/src/LDAIClientImpl.ts +0 -327
- package/src/LDAIConfigTrackerImpl.ts +0 -288
- package/src/LDClientMin.ts +0 -18
- package/src/api/LDAIClient.ts +0 -325
- package/src/api/chat/TrackedChat.ts +0 -163
- package/src/api/chat/index.ts +0 -2
- package/src/api/chat/types.ts +0 -24
- package/src/api/config/LDAIConfigTracker.ts +0 -238
- package/src/api/config/LDAIConfigUtils.ts +0 -212
- package/src/api/config/index.ts +0 -3
- package/src/api/config/types.ts +0 -260
- package/src/api/index.ts +0 -6
- package/src/api/judge/EvaluationSchemaBuilder.ts +0 -54
- package/src/api/judge/Judge.ts +0 -218
- package/src/api/judge/index.ts +0 -2
- package/src/api/judge/types.ts +0 -41
- package/src/api/metrics/BedrockTokenUsage.ts +0 -13
- package/src/api/metrics/LDAIMetrics.ts +0 -18
- package/src/api/metrics/LDFeedbackKind.ts +0 -13
- package/src/api/metrics/LDTokenUsage.ts +0 -19
- package/src/api/metrics/OpenAiUsage.ts +0 -13
- package/src/api/metrics/VercelAISDKTokenUsage.ts +0 -15
- package/src/api/metrics/index.ts +0 -6
- package/src/api/providers/AIProvider.ts +0 -94
- package/src/api/providers/AIProviderFactory.ts +0 -152
- package/src/api/providers/index.ts +0 -2
- package/src/index.ts +0 -24
- package/tsconfig.eslint.json +0 -5
- package/tsconfig.json +0 -21
- package/tsconfig.ref.json +0 -7
- package/typedoc.json +0 -5
package/src/api/config/types.ts
DELETED
|
@@ -1,260 +0,0 @@
|
|
|
1
|
-
import { LDAIConfigTracker } from './LDAIConfigTracker';
|
|
2
|
-
|
|
3
|
-
// ============================================================================
|
|
4
|
-
// Foundation Types
|
|
5
|
-
// ============================================================================
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Information about prompts.
|
|
9
|
-
*/
|
|
10
|
-
export interface LDMessage {
|
|
11
|
-
/**
|
|
12
|
-
* The role of the prompt.
|
|
13
|
-
*/
|
|
14
|
-
role: 'user' | 'assistant' | 'system';
|
|
15
|
-
/**
|
|
16
|
-
* Content for the prompt.
|
|
17
|
-
*/
|
|
18
|
-
content: string;
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* Configuration related to the model.
|
|
23
|
-
*/
|
|
24
|
-
export interface LDModelConfig {
|
|
25
|
-
/**
|
|
26
|
-
* The ID of the model.
|
|
27
|
-
*/
|
|
28
|
-
name: string;
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Model specific parameters.
|
|
32
|
-
*/
|
|
33
|
-
parameters?: { [index: string]: unknown };
|
|
34
|
-
|
|
35
|
-
/**
|
|
36
|
-
* Additional user-specified parameters.
|
|
37
|
-
*/
|
|
38
|
-
custom?: { [index: string]: unknown };
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
export interface LDProviderConfig {
|
|
42
|
-
/**
|
|
43
|
-
* The name of the provider.
|
|
44
|
-
*/
|
|
45
|
-
name: string;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
// ============================================================================
|
|
49
|
-
// Judge Types
|
|
50
|
-
// ============================================================================
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* Configuration for a single judge attachment.
|
|
54
|
-
*/
|
|
55
|
-
export interface LDJudge {
|
|
56
|
-
/** The key of the judge AI Config */
|
|
57
|
-
key: string;
|
|
58
|
-
/** Sampling rate for evaluation (0.0 to 1.0) */
|
|
59
|
-
samplingRate: number;
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
/**
|
|
63
|
-
* Configuration for judge attachment to AI Configs.
|
|
64
|
-
*/
|
|
65
|
-
export interface LDJudgeConfiguration {
|
|
66
|
-
/** Array of judge configurations */
|
|
67
|
-
judges: LDJudge[];
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
// ============================================================================
|
|
71
|
-
// Base AI Config Types
|
|
72
|
-
// ============================================================================
|
|
73
|
-
|
|
74
|
-
/**
|
|
75
|
-
* Base AI Config interface for default implementations with optional enabled property.
|
|
76
|
-
*/
|
|
77
|
-
export interface LDAIConfigDefault {
|
|
78
|
-
/**
|
|
79
|
-
* Optional model configuration.
|
|
80
|
-
*/
|
|
81
|
-
model?: LDModelConfig;
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* Optional configuration for the provider.
|
|
85
|
-
*/
|
|
86
|
-
provider?: LDProviderConfig;
|
|
87
|
-
|
|
88
|
-
/**
|
|
89
|
-
* Whether the configuration is enabled. Defaults to false when not provided.
|
|
90
|
-
*/
|
|
91
|
-
enabled?: boolean;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* Base AI Config interface without mode-specific fields.
|
|
96
|
-
*/
|
|
97
|
-
export interface LDAIConfig extends Omit<LDAIConfigDefault, 'enabled'> {
|
|
98
|
-
/**
|
|
99
|
-
* The key of the AI Config.
|
|
100
|
-
*/
|
|
101
|
-
key: string;
|
|
102
|
-
/**
|
|
103
|
-
* Whether the configuration is enabled.
|
|
104
|
-
*/
|
|
105
|
-
enabled: boolean;
|
|
106
|
-
|
|
107
|
-
/**
|
|
108
|
-
* A tracker which can be used to generate analytics.
|
|
109
|
-
* Undefined for disabled configs.
|
|
110
|
-
*/
|
|
111
|
-
tracker?: LDAIConfigTracker;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
// ============================================================================
|
|
115
|
-
// Default AI Config Implementation Types
|
|
116
|
-
// ============================================================================
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* Default Agent-specific AI Config with instructions.
|
|
120
|
-
*/
|
|
121
|
-
export interface LDAIAgentConfigDefault extends LDAIConfigDefault {
|
|
122
|
-
/**
|
|
123
|
-
* Instructions for the agent.
|
|
124
|
-
*/
|
|
125
|
-
instructions?: string;
|
|
126
|
-
/**
|
|
127
|
-
* Judge configuration for AI Configs being evaluated.
|
|
128
|
-
* References judge AI Configs that should evaluate this AI Config.
|
|
129
|
-
*/
|
|
130
|
-
judgeConfiguration?: LDJudgeConfiguration;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* Default Completion AI Config (default mode).
|
|
135
|
-
*/
|
|
136
|
-
export interface LDAICompletionConfigDefault extends LDAIConfigDefault {
|
|
137
|
-
/**
|
|
138
|
-
* Optional prompt data for completion configurations.
|
|
139
|
-
*/
|
|
140
|
-
messages?: LDMessage[];
|
|
141
|
-
/**
|
|
142
|
-
* Judge configuration for AI Configs being evaluated.
|
|
143
|
-
* References judge AI Configs that should evaluate this AI Config.
|
|
144
|
-
*/
|
|
145
|
-
judgeConfiguration?: LDJudgeConfiguration;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
/**
|
|
149
|
-
* Default Judge-specific AI Config with required evaluation metric key.
|
|
150
|
-
*/
|
|
151
|
-
export interface LDAIJudgeConfigDefault extends LDAIConfigDefault {
|
|
152
|
-
/**
|
|
153
|
-
* Optional prompt data for judge configurations.
|
|
154
|
-
*/
|
|
155
|
-
messages?: LDMessage[];
|
|
156
|
-
/**
|
|
157
|
-
* Evaluation metric keys for judge configurations.
|
|
158
|
-
* The keys of the metrics that this judge can evaluate.
|
|
159
|
-
*/
|
|
160
|
-
evaluationMetricKeys?: string[];
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
/**
|
|
164
|
-
* Union type for all default AI Config variants.
|
|
165
|
-
*/
|
|
166
|
-
export type LDAIConfigDefaultKind =
|
|
167
|
-
| LDAIAgentConfigDefault
|
|
168
|
-
| LDAICompletionConfigDefault
|
|
169
|
-
| LDAIJudgeConfigDefault;
|
|
170
|
-
|
|
171
|
-
// ============================================================================
|
|
172
|
-
// AI Config Implementation Types
|
|
173
|
-
// ============================================================================
|
|
174
|
-
|
|
175
|
-
/**
|
|
176
|
-
* Agent-specific AI Config with instructions.
|
|
177
|
-
*/
|
|
178
|
-
export interface LDAIAgentConfig extends LDAIConfig {
|
|
179
|
-
/**
|
|
180
|
-
* Instructions for the agent.
|
|
181
|
-
*/
|
|
182
|
-
instructions?: string;
|
|
183
|
-
/**
|
|
184
|
-
* Judge configuration for AI Configs being evaluated.
|
|
185
|
-
* References judge AI Configs that should evaluate this AI Config.
|
|
186
|
-
*/
|
|
187
|
-
judgeConfiguration?: LDJudgeConfiguration;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
/**
|
|
191
|
-
* Completion AI Config (default mode).
|
|
192
|
-
*/
|
|
193
|
-
export interface LDAICompletionConfig extends LDAIConfig {
|
|
194
|
-
/**
|
|
195
|
-
* Optional prompt data for completion configurations.
|
|
196
|
-
*/
|
|
197
|
-
messages?: LDMessage[];
|
|
198
|
-
/**
|
|
199
|
-
* Judge configuration for AI Configs being evaluated.
|
|
200
|
-
* References judge AI Configs that should evaluate this AI Config.
|
|
201
|
-
*/
|
|
202
|
-
judgeConfiguration?: LDJudgeConfiguration;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
/**
|
|
206
|
-
* Judge-specific AI Config with required evaluation metric key.
|
|
207
|
-
*/
|
|
208
|
-
export interface LDAIJudgeConfig extends LDAIConfig {
|
|
209
|
-
/**
|
|
210
|
-
* Optional prompt data for judge configurations.
|
|
211
|
-
*/
|
|
212
|
-
messages?: LDMessage[];
|
|
213
|
-
/**
|
|
214
|
-
* Evaluation metric keys for judge configurations.
|
|
215
|
-
* The keys of the metrics that this judge can evaluate.
|
|
216
|
-
*/
|
|
217
|
-
evaluationMetricKeys: string[];
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
// ============================================================================
|
|
221
|
-
// Union Types
|
|
222
|
-
// ============================================================================
|
|
223
|
-
|
|
224
|
-
/**
|
|
225
|
-
* Union type for all AI Config variants.
|
|
226
|
-
*/
|
|
227
|
-
export type LDAIConfigKind = LDAIAgentConfig | LDAICompletionConfig | LDAIJudgeConfig;
|
|
228
|
-
|
|
229
|
-
// ============================================================================
|
|
230
|
-
// Agent-Specific Request Type
|
|
231
|
-
// ============================================================================
|
|
232
|
-
|
|
233
|
-
/**
|
|
234
|
-
* Configuration for a single agent request.
|
|
235
|
-
*/
|
|
236
|
-
export interface LDAIAgentRequestConfig {
|
|
237
|
-
/**
|
|
238
|
-
* The agent key to retrieve.
|
|
239
|
-
*/
|
|
240
|
-
key: string;
|
|
241
|
-
|
|
242
|
-
/**
|
|
243
|
-
* Default configuration for the agent.
|
|
244
|
-
*/
|
|
245
|
-
defaultValue: LDAIAgentConfigDefault;
|
|
246
|
-
|
|
247
|
-
/**
|
|
248
|
-
* Variables for instructions interpolation.
|
|
249
|
-
*/
|
|
250
|
-
variables?: Record<string, unknown>;
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
// ============================================================================
|
|
254
|
-
// Mode Type
|
|
255
|
-
// ============================================================================
|
|
256
|
-
|
|
257
|
-
/**
|
|
258
|
-
* Mode type for AI configurations.
|
|
259
|
-
*/
|
|
260
|
-
export type LDAIConfigMode = 'completion' | 'agent' | 'judge';
|
package/src/api/index.ts
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Internal class for building dynamic evaluation response schemas.
|
|
3
|
-
* Not exported - only used internally by TrackedJudge.
|
|
4
|
-
*/
|
|
5
|
-
class EvaluationSchemaBuilder {
|
|
6
|
-
static build(evaluationMetricKeys: string[]): Record<string, unknown> {
|
|
7
|
-
return {
|
|
8
|
-
type: 'object',
|
|
9
|
-
properties: {
|
|
10
|
-
evaluations: {
|
|
11
|
-
type: 'object',
|
|
12
|
-
description: `Object containing evaluation results for ${evaluationMetricKeys.join(', ')} metrics`,
|
|
13
|
-
properties: this._buildKeyProperties(evaluationMetricKeys),
|
|
14
|
-
required: evaluationMetricKeys,
|
|
15
|
-
additionalProperties: false,
|
|
16
|
-
},
|
|
17
|
-
},
|
|
18
|
-
required: ['evaluations'],
|
|
19
|
-
additionalProperties: false,
|
|
20
|
-
} as const;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
private static _buildKeyProperties(evaluationMetricKeys: string[]) {
|
|
24
|
-
return evaluationMetricKeys.reduce(
|
|
25
|
-
(acc, key) => {
|
|
26
|
-
acc[key] = this._buildKeySchema(key);
|
|
27
|
-
return acc;
|
|
28
|
-
},
|
|
29
|
-
{} as Record<string, unknown>,
|
|
30
|
-
);
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
private static _buildKeySchema(key: string) {
|
|
34
|
-
return {
|
|
35
|
-
type: 'object',
|
|
36
|
-
properties: {
|
|
37
|
-
score: {
|
|
38
|
-
type: 'number',
|
|
39
|
-
minimum: 0,
|
|
40
|
-
maximum: 1,
|
|
41
|
-
description: `Score between 0.0 and 1.0 for ${key}`,
|
|
42
|
-
},
|
|
43
|
-
reasoning: {
|
|
44
|
-
type: 'string',
|
|
45
|
-
description: `Reasoning behind the score for ${key}`,
|
|
46
|
-
},
|
|
47
|
-
},
|
|
48
|
-
required: ['score', 'reasoning'],
|
|
49
|
-
additionalProperties: false,
|
|
50
|
-
};
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
export { EvaluationSchemaBuilder };
|
package/src/api/judge/Judge.ts
DELETED
|
@@ -1,218 +0,0 @@
|
|
|
1
|
-
import * as Mustache from 'mustache';
|
|
2
|
-
|
|
3
|
-
import { LDLogger } from '@launchdarkly/js-server-sdk-common';
|
|
4
|
-
|
|
5
|
-
import { ChatResponse } from '../chat/types';
|
|
6
|
-
import { LDAIConfigTracker } from '../config/LDAIConfigTracker';
|
|
7
|
-
import { LDAIJudgeConfig, LDMessage } from '../config/types';
|
|
8
|
-
import { AIProvider } from '../providers/AIProvider';
|
|
9
|
-
import { EvaluationSchemaBuilder } from './EvaluationSchemaBuilder';
|
|
10
|
-
import { EvalScore, JudgeResponse, StructuredResponse } from './types';
|
|
11
|
-
|
|
12
|
-
/**
|
|
13
|
-
* Judge implementation that handles evaluation functionality and conversation management.
|
|
14
|
-
*
|
|
15
|
-
* According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
|
|
16
|
-
* other AI Configs using structured output.
|
|
17
|
-
*/
|
|
18
|
-
export class Judge {
|
|
19
|
-
private readonly _logger?: LDLogger;
|
|
20
|
-
private readonly _evaluationResponseStructure: Record<string, unknown>;
|
|
21
|
-
|
|
22
|
-
constructor(
|
|
23
|
-
private readonly _aiConfig: LDAIJudgeConfig,
|
|
24
|
-
private readonly _aiConfigTracker: LDAIConfigTracker,
|
|
25
|
-
private readonly _aiProvider: AIProvider,
|
|
26
|
-
logger?: LDLogger,
|
|
27
|
-
) {
|
|
28
|
-
this._logger = logger;
|
|
29
|
-
this._evaluationResponseStructure = EvaluationSchemaBuilder.build(
|
|
30
|
-
this._aiConfig.evaluationMetricKeys,
|
|
31
|
-
);
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* Evaluates an AI response using the judge's configuration.
|
|
36
|
-
*
|
|
37
|
-
* @param input The input prompt or question that was provided to the AI
|
|
38
|
-
* @param output The AI-generated response to be evaluated
|
|
39
|
-
* @param samplingRate Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
|
|
40
|
-
* @returns Promise that resolves to evaluation results or undefined if not sampled
|
|
41
|
-
*/
|
|
42
|
-
async evaluate(
|
|
43
|
-
input: string,
|
|
44
|
-
output: string,
|
|
45
|
-
samplingRate: number = 1,
|
|
46
|
-
): Promise<JudgeResponse | undefined> {
|
|
47
|
-
try {
|
|
48
|
-
if (
|
|
49
|
-
!this._aiConfig.evaluationMetricKeys ||
|
|
50
|
-
this._aiConfig.evaluationMetricKeys.length === 0
|
|
51
|
-
) {
|
|
52
|
-
this._logger?.warn(
|
|
53
|
-
'Judge configuration is missing required evaluationMetricKeys',
|
|
54
|
-
this._aiConfigTracker.getTrackData(),
|
|
55
|
-
);
|
|
56
|
-
return undefined;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
if (!this._aiConfig.messages) {
|
|
60
|
-
this._logger?.warn(
|
|
61
|
-
'Judge configuration must include messages',
|
|
62
|
-
this._aiConfigTracker.getTrackData(),
|
|
63
|
-
);
|
|
64
|
-
return undefined;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
if (Math.random() > samplingRate) {
|
|
68
|
-
this._logger?.debug(`Judge evaluation skipped due to sampling rate: ${samplingRate}`);
|
|
69
|
-
return undefined;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
const messages = this._constructEvaluationMessages(input, output);
|
|
73
|
-
|
|
74
|
-
const response = await this._aiConfigTracker.trackMetricsOf(
|
|
75
|
-
(result: StructuredResponse) => result.metrics,
|
|
76
|
-
() => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure),
|
|
77
|
-
);
|
|
78
|
-
|
|
79
|
-
let { success } = response.metrics;
|
|
80
|
-
|
|
81
|
-
const evals = this._parseEvaluationResponse(response.data);
|
|
82
|
-
|
|
83
|
-
if (Object.keys(evals).length !== this._aiConfig.evaluationMetricKeys.length) {
|
|
84
|
-
this._logger?.warn(
|
|
85
|
-
'Judge evaluation did not return all evaluations',
|
|
86
|
-
this._aiConfigTracker.getTrackData(),
|
|
87
|
-
);
|
|
88
|
-
success = false;
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
return {
|
|
92
|
-
evals,
|
|
93
|
-
success,
|
|
94
|
-
judgeConfigKey: this._aiConfig.key,
|
|
95
|
-
};
|
|
96
|
-
} catch (error) {
|
|
97
|
-
this._logger?.error('Judge evaluation failed:', error);
|
|
98
|
-
return {
|
|
99
|
-
evals: {},
|
|
100
|
-
success: false,
|
|
101
|
-
error: error instanceof Error ? error.message : 'Unknown error',
|
|
102
|
-
judgeConfigKey: this._aiConfig.key,
|
|
103
|
-
};
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
/**
|
|
108
|
-
* Evaluates an AI response from chat messages and response.
|
|
109
|
-
*
|
|
110
|
-
* @param messages Array of messages representing the conversation history
|
|
111
|
-
* @param response The AI response to be evaluated
|
|
112
|
-
* @param samplingRatio Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
|
|
113
|
-
* @returns Promise that resolves to evaluation results or undefined if not sampled
|
|
114
|
-
*/
|
|
115
|
-
async evaluateMessages(
|
|
116
|
-
messages: LDMessage[],
|
|
117
|
-
response: ChatResponse,
|
|
118
|
-
samplingRatio: number = 1,
|
|
119
|
-
): Promise<JudgeResponse | undefined> {
|
|
120
|
-
const input = messages.length === 0 ? '' : messages.map((msg) => msg.content).join('\r\n');
|
|
121
|
-
const output = response.message.content;
|
|
122
|
-
|
|
123
|
-
return this.evaluate(input, output, samplingRatio);
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
/**
|
|
127
|
-
* Returns the AI Config used by this judge.
|
|
128
|
-
*/
|
|
129
|
-
getAIConfig(): LDAIJudgeConfig {
|
|
130
|
-
return this._aiConfig;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* Returns the tracker associated with this judge.
|
|
135
|
-
*/
|
|
136
|
-
getTracker(): LDAIConfigTracker {
|
|
137
|
-
return this._aiConfigTracker;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
/**
|
|
141
|
-
* Returns the AI provider used by this judge.
|
|
142
|
-
*/
|
|
143
|
-
getProvider(): AIProvider {
|
|
144
|
-
return this._aiProvider;
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
/**
|
|
148
|
-
* Constructs evaluation messages by combining judge's config messages with input/output.
|
|
149
|
-
*/
|
|
150
|
-
private _constructEvaluationMessages(input: string, output: string): LDMessage[] {
|
|
151
|
-
const messages: LDMessage[] = this._aiConfig.messages!.map((msg) => ({
|
|
152
|
-
...msg,
|
|
153
|
-
content: this._interpolateMessage(msg.content, {
|
|
154
|
-
message_history: input,
|
|
155
|
-
response_to_evaluate: output,
|
|
156
|
-
}),
|
|
157
|
-
}));
|
|
158
|
-
|
|
159
|
-
return messages;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
/**
|
|
163
|
-
* Interpolates message content with variables using Mustache templating.
|
|
164
|
-
*/
|
|
165
|
-
private _interpolateMessage(content: string, variables: Record<string, string>): string {
|
|
166
|
-
return Mustache.render(content, variables, undefined, { escape: (item: any) => item });
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
/**
|
|
170
|
-
* Parses the structured evaluation response from the AI provider.
|
|
171
|
-
*/
|
|
172
|
-
private _parseEvaluationResponse(data: Record<string, unknown>): Record<string, EvalScore> {
|
|
173
|
-
const evaluations = data.evaluations as Record<string, unknown>;
|
|
174
|
-
const results: Record<string, EvalScore> = {};
|
|
175
|
-
|
|
176
|
-
if (!data.evaluations || typeof data.evaluations !== 'object') {
|
|
177
|
-
this._logger?.warn('Invalid response: missing or invalid evaluations object');
|
|
178
|
-
return results;
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
this._aiConfig.evaluationMetricKeys.forEach((metricKey) => {
|
|
182
|
-
const evaluation = evaluations[metricKey];
|
|
183
|
-
|
|
184
|
-
if (!evaluation || typeof evaluation !== 'object') {
|
|
185
|
-
this._logger?.warn(
|
|
186
|
-
`Missing evaluation for metric key: ${metricKey}`,
|
|
187
|
-
this._aiConfigTracker.getTrackData(),
|
|
188
|
-
);
|
|
189
|
-
return;
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
const evalData = evaluation as Record<string, unknown>;
|
|
193
|
-
|
|
194
|
-
if (typeof evalData.score !== 'number' || evalData.score < 0 || evalData.score > 1) {
|
|
195
|
-
this._logger?.warn(
|
|
196
|
-
`Invalid score evaluated for ${metricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`,
|
|
197
|
-
this._aiConfigTracker.getTrackData(),
|
|
198
|
-
);
|
|
199
|
-
return;
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
if (typeof evalData.reasoning !== 'string') {
|
|
203
|
-
this._logger?.warn(
|
|
204
|
-
`Invalid reasoning evaluated for ${metricKey}: ${evalData.reasoning}. Reasoning must be a string`,
|
|
205
|
-
this._aiConfigTracker.getTrackData(),
|
|
206
|
-
);
|
|
207
|
-
return;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
results[metricKey] = {
|
|
211
|
-
score: evalData.score,
|
|
212
|
-
reasoning: evalData.reasoning,
|
|
213
|
-
};
|
|
214
|
-
});
|
|
215
|
-
|
|
216
|
-
return results;
|
|
217
|
-
}
|
|
218
|
-
}
|
package/src/api/judge/index.ts
DELETED
package/src/api/judge/types.ts
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
import { LDAIMetrics } from '../metrics/LDAIMetrics';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Structured response from AI models.
|
|
5
|
-
*/
|
|
6
|
-
export interface StructuredResponse {
|
|
7
|
-
/** The structured data returned by the model */
|
|
8
|
-
data: Record<string, unknown>;
|
|
9
|
-
|
|
10
|
-
/** The raw response from the model */
|
|
11
|
-
rawResponse: string;
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Metrics information including success status and token usage.
|
|
15
|
-
*/
|
|
16
|
-
metrics: LDAIMetrics;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
/**
|
|
20
|
-
* Score and reasoning for a single evaluation metric.
|
|
21
|
-
*/
|
|
22
|
-
export interface EvalScore {
|
|
23
|
-
/** Score between 0.0 and 1.0 indicating the evaluation result for this metric */
|
|
24
|
-
score: number;
|
|
25
|
-
/** Reasoning behind the provided score for this metric */
|
|
26
|
-
reasoning: string;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
/**
|
|
30
|
-
* Response from a judge evaluation containing scores and reasoning for multiple metrics.
|
|
31
|
-
*/
|
|
32
|
-
export interface JudgeResponse {
|
|
33
|
-
/** The key of the judge configuration that was used to generate this response */
|
|
34
|
-
judgeConfigKey?: string;
|
|
35
|
-
/** Dictionary where keys are metric names and values contain score and reasoning */
|
|
36
|
-
evals: Record<string, EvalScore>;
|
|
37
|
-
/** Whether the evaluation completed successfully */
|
|
38
|
-
success: boolean;
|
|
39
|
-
/** Error message if evaluation failed */
|
|
40
|
-
error?: string;
|
|
41
|
-
}
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
import { LDTokenUsage } from './LDTokenUsage';
|
|
2
|
-
|
|
3
|
-
export function createBedrockTokenUsage(data: {
|
|
4
|
-
totalTokens?: number;
|
|
5
|
-
inputTokens?: number;
|
|
6
|
-
outputTokens?: number;
|
|
7
|
-
}): LDTokenUsage {
|
|
8
|
-
return {
|
|
9
|
-
total: data.totalTokens || 0,
|
|
10
|
-
input: data.inputTokens || 0,
|
|
11
|
-
output: data.outputTokens || 0,
|
|
12
|
-
};
|
|
13
|
-
}
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import { LDTokenUsage } from './LDTokenUsage';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Metrics information for AI operations that includes success status and token usage.
|
|
5
|
-
* This class combines success/failure tracking with token usage metrics.
|
|
6
|
-
*/
|
|
7
|
-
export interface LDAIMetrics {
|
|
8
|
-
/**
|
|
9
|
-
* Whether the AI operation was successful.
|
|
10
|
-
*/
|
|
11
|
-
success: boolean;
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* Token usage information for the operation.
|
|
15
|
-
* This will be undefined if no token usage data is available.
|
|
16
|
-
*/
|
|
17
|
-
usage?: LDTokenUsage;
|
|
18
|
-
}
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Information about token usage.
|
|
3
|
-
*/
|
|
4
|
-
export interface LDTokenUsage {
|
|
5
|
-
/**
|
|
6
|
-
* Combined token usage.
|
|
7
|
-
*/
|
|
8
|
-
total: number;
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Number of tokens in the input.
|
|
12
|
-
*/
|
|
13
|
-
input: number;
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Number of tokens in the output.
|
|
17
|
-
*/
|
|
18
|
-
output: number;
|
|
19
|
-
}
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
import { LDTokenUsage } from './LDTokenUsage';
|
|
2
|
-
|
|
3
|
-
export function createOpenAiUsage(data: {
|
|
4
|
-
total_tokens?: number;
|
|
5
|
-
prompt_tokens?: number;
|
|
6
|
-
completion_tokens?: number;
|
|
7
|
-
}): LDTokenUsage {
|
|
8
|
-
return {
|
|
9
|
-
total: data.total_tokens ?? 0,
|
|
10
|
-
input: data.prompt_tokens ?? 0,
|
|
11
|
-
output: data.completion_tokens ?? 0,
|
|
12
|
-
};
|
|
13
|
-
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import { LDTokenUsage } from './LDTokenUsage';
|
|
2
|
-
|
|
3
|
-
export function createVercelAISDKTokenUsage(data: {
|
|
4
|
-
totalTokens?: number;
|
|
5
|
-
inputTokens?: number;
|
|
6
|
-
promptTokens?: number;
|
|
7
|
-
outputTokens?: number;
|
|
8
|
-
completionTokens?: number;
|
|
9
|
-
}): LDTokenUsage {
|
|
10
|
-
return {
|
|
11
|
-
total: data.totalTokens ?? 0,
|
|
12
|
-
input: data.inputTokens ?? data.promptTokens ?? 0,
|
|
13
|
-
output: data.outputTokens ?? data.completionTokens ?? 0,
|
|
14
|
-
};
|
|
15
|
-
}
|