@launchdarkly/server-sdk-ai 0.14.1 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/CHANGELOG.md +22 -0
  2. package/dist/index.cjs +1117 -0
  3. package/dist/index.cjs.map +1 -0
  4. package/dist/index.d.cts +1022 -0
  5. package/dist/index.d.ts +1022 -0
  6. package/dist/index.js +1071 -0
  7. package/dist/index.js.map +1 -0
  8. package/package.json +23 -6
  9. package/__tests__/Judge.test.ts +0 -521
  10. package/__tests__/LDAIClientImpl.test.ts +0 -594
  11. package/__tests__/LDAIConfigTrackerImpl.test.ts +0 -815
  12. package/__tests__/TokenUsage.test.ts +0 -119
  13. package/__tests__/TrackedChat.test.ts +0 -231
  14. package/dist/package.json +0 -53
  15. package/dist/src/LDAIClientImpl.d.ts +0 -39
  16. package/dist/src/LDAIClientImpl.d.ts.map +0 -1
  17. package/dist/src/LDAIClientImpl.js +0 -164
  18. package/dist/src/LDAIClientImpl.js.map +0 -1
  19. package/dist/src/LDAIConfigTrackerImpl.d.ts +0 -74
  20. package/dist/src/LDAIConfigTrackerImpl.d.ts.map +0 -1
  21. package/dist/src/LDAIConfigTrackerImpl.js +0 -207
  22. package/dist/src/LDAIConfigTrackerImpl.js.map +0 -1
  23. package/dist/src/LDClientMin.d.ts +0 -11
  24. package/dist/src/LDClientMin.d.ts.map +0 -1
  25. package/dist/src/LDClientMin.js +0 -3
  26. package/dist/src/LDClientMin.js.map +0 -1
  27. package/dist/src/api/LDAIClient.d.ts +0 -258
  28. package/dist/src/api/LDAIClient.d.ts.map +0 -1
  29. package/dist/src/api/LDAIClient.js +0 -3
  30. package/dist/src/api/LDAIClient.js.map +0 -1
  31. package/dist/src/api/chat/TrackedChat.d.ts +0 -72
  32. package/dist/src/api/chat/TrackedChat.d.ts.map +0 -1
  33. package/dist/src/api/chat/TrackedChat.js +0 -125
  34. package/dist/src/api/chat/TrackedChat.js.map +0 -1
  35. package/dist/src/api/chat/index.d.ts +0 -3
  36. package/dist/src/api/chat/index.d.ts.map +0 -1
  37. package/dist/src/api/chat/index.js +0 -19
  38. package/dist/src/api/chat/index.js.map +0 -1
  39. package/dist/src/api/chat/types.d.ts +0 -22
  40. package/dist/src/api/chat/types.d.ts.map +0 -1
  41. package/dist/src/api/chat/types.js +0 -3
  42. package/dist/src/api/chat/types.js.map +0 -1
  43. package/dist/src/api/config/LDAIConfigTracker.d.ts +0 -209
  44. package/dist/src/api/config/LDAIConfigTracker.d.ts.map +0 -1
  45. package/dist/src/api/config/LDAIConfigTracker.js +0 -3
  46. package/dist/src/api/config/LDAIConfigTracker.js.map +0 -1
  47. package/dist/src/api/config/LDAIConfigUtils.d.ts +0 -2
  48. package/dist/src/api/config/LDAIConfigUtils.d.ts.map +0 -1
  49. package/dist/src/api/config/LDAIConfigUtils.js +0 -145
  50. package/dist/src/api/config/LDAIConfigUtils.js.map +0 -1
  51. package/dist/src/api/config/index.d.ts +0 -3
  52. package/dist/src/api/config/index.d.ts.map +0 -1
  53. package/dist/src/api/config/index.js +0 -18
  54. package/dist/src/api/config/index.js.map +0 -1
  55. package/dist/src/api/config/types.d.ts +0 -206
  56. package/dist/src/api/config/types.d.ts.map +0 -1
  57. package/dist/src/api/config/types.js +0 -3
  58. package/dist/src/api/config/types.js.map +0 -1
  59. package/dist/src/api/index.d.ts +0 -7
  60. package/dist/src/api/index.d.ts.map +0 -1
  61. package/dist/src/api/index.js +0 -23
  62. package/dist/src/api/index.js.map +0 -1
  63. package/dist/src/api/judge/EvaluationSchemaBuilder.d.ts +0 -11
  64. package/dist/src/api/judge/EvaluationSchemaBuilder.d.ts.map +0 -1
  65. package/dist/src/api/judge/EvaluationSchemaBuilder.js +0 -52
  66. package/dist/src/api/judge/EvaluationSchemaBuilder.js.map +0 -1
  67. package/dist/src/api/judge/Judge.d.ts +0 -63
  68. package/dist/src/api/judge/Judge.d.ts.map +0 -1
  69. package/dist/src/api/judge/Judge.js +0 -151
  70. package/dist/src/api/judge/Judge.js.map +0 -1
  71. package/dist/src/api/judge/index.d.ts +0 -3
  72. package/dist/src/api/judge/index.d.ts.map +0 -1
  73. package/dist/src/api/judge/index.js +0 -6
  74. package/dist/src/api/judge/index.js.map +0 -1
  75. package/dist/src/api/judge/types.d.ts +0 -37
  76. package/dist/src/api/judge/types.d.ts.map +0 -1
  77. package/dist/src/api/judge/types.js +0 -3
  78. package/dist/src/api/judge/types.js.map +0 -1
  79. package/dist/src/api/metrics/BedrockTokenUsage.d.ts +0 -7
  80. package/dist/src/api/metrics/BedrockTokenUsage.d.ts.map +0 -1
  81. package/dist/src/api/metrics/BedrockTokenUsage.js +0 -12
  82. package/dist/src/api/metrics/BedrockTokenUsage.js.map +0 -1
  83. package/dist/src/api/metrics/LDAIMetrics.d.ts +0 -17
  84. package/dist/src/api/metrics/LDAIMetrics.d.ts.map +0 -1
  85. package/dist/src/api/metrics/LDAIMetrics.js +0 -3
  86. package/dist/src/api/metrics/LDAIMetrics.js.map +0 -1
  87. package/dist/src/api/metrics/LDFeedbackKind.d.ts +0 -14
  88. package/dist/src/api/metrics/LDFeedbackKind.d.ts.map +0 -1
  89. package/dist/src/api/metrics/LDFeedbackKind.js +0 -18
  90. package/dist/src/api/metrics/LDFeedbackKind.js.map +0 -1
  91. package/dist/src/api/metrics/LDTokenUsage.d.ts +0 -18
  92. package/dist/src/api/metrics/LDTokenUsage.d.ts.map +0 -1
  93. package/dist/src/api/metrics/LDTokenUsage.js +0 -3
  94. package/dist/src/api/metrics/LDTokenUsage.js.map +0 -1
  95. package/dist/src/api/metrics/OpenAiUsage.d.ts +0 -7
  96. package/dist/src/api/metrics/OpenAiUsage.d.ts.map +0 -1
  97. package/dist/src/api/metrics/OpenAiUsage.js +0 -13
  98. package/dist/src/api/metrics/OpenAiUsage.js.map +0 -1
  99. package/dist/src/api/metrics/VercelAISDKTokenUsage.d.ts +0 -9
  100. package/dist/src/api/metrics/VercelAISDKTokenUsage.d.ts.map +0 -1
  101. package/dist/src/api/metrics/VercelAISDKTokenUsage.js +0 -13
  102. package/dist/src/api/metrics/VercelAISDKTokenUsage.js.map +0 -1
  103. package/dist/src/api/metrics/index.d.ts +0 -7
  104. package/dist/src/api/metrics/index.d.ts.map +0 -1
  105. package/dist/src/api/metrics/index.js +0 -23
  106. package/dist/src/api/metrics/index.js.map +0 -1
  107. package/dist/src/api/providers/AIProvider.d.ts +0 -52
  108. package/dist/src/api/providers/AIProvider.d.ts.map +0 -1
  109. package/dist/src/api/providers/AIProvider.js +0 -88
  110. package/dist/src/api/providers/AIProvider.js.map +0 -1
  111. package/dist/src/api/providers/AIProviderFactory.d.ts +0 -39
  112. package/dist/src/api/providers/AIProviderFactory.d.ts.map +0 -1
  113. package/dist/src/api/providers/AIProviderFactory.js +0 -102
  114. package/dist/src/api/providers/AIProviderFactory.js.map +0 -1
  115. package/dist/src/api/providers/index.d.ts +0 -3
  116. package/dist/src/api/providers/index.d.ts.map +0 -1
  117. package/dist/src/api/providers/index.js +0 -19
  118. package/dist/src/api/providers/index.js.map +0 -1
  119. package/dist/src/index.d.ts +0 -19
  120. package/dist/src/index.d.ts.map +0 -1
  121. package/dist/src/index.js +0 -29
  122. package/dist/src/index.js.map +0 -1
  123. package/docs/.nojekyll +0 -1
  124. package/docs/assets/highlight.css +0 -92
  125. package/docs/assets/main.js +0 -58
  126. package/docs/assets/search.js +0 -1
  127. package/docs/assets/style.css +0 -1379
  128. package/docs/classes/AIProvider.html +0 -210
  129. package/docs/classes/AIProviderFactory.html +0 -208
  130. package/docs/classes/Judge.html +0 -322
  131. package/docs/classes/TrackedChat.html +0 -322
  132. package/docs/enums/LDFeedbackKind.html +0 -115
  133. package/docs/functions/createBedrockTokenUsage.html +0 -94
  134. package/docs/functions/createOpenAiUsage.html +0 -94
  135. package/docs/functions/createVercelAISDKTokenUsage.html +0 -98
  136. package/docs/functions/initAi.html +0 -93
  137. package/docs/index.html +0 -136
  138. package/docs/interfaces/ChatResponse.html +0 -130
  139. package/docs/interfaces/EvalScore.html +0 -119
  140. package/docs/interfaces/JudgeResponse.html +0 -139
  141. package/docs/interfaces/LDAIAgentConfig.html +0 -178
  142. package/docs/interfaces/LDAIAgentConfigDefault.html +0 -155
  143. package/docs/interfaces/LDAIAgentRequestConfig.html +0 -129
  144. package/docs/interfaces/LDAIClient.html +0 -449
  145. package/docs/interfaces/LDAICompletionConfig.html +0 -178
  146. package/docs/interfaces/LDAICompletionConfigDefault.html +0 -155
  147. package/docs/interfaces/LDAIConfig.html +0 -158
  148. package/docs/interfaces/LDAIConfigDefault.html +0 -133
  149. package/docs/interfaces/LDAIConfigTracker.html +0 -530
  150. package/docs/interfaces/LDAIJudgeConfig.html +0 -178
  151. package/docs/interfaces/LDAIJudgeConfigDefault.html +0 -155
  152. package/docs/interfaces/LDAIMetrics.html +0 -121
  153. package/docs/interfaces/LDJudge.html +0 -119
  154. package/docs/interfaces/LDJudgeConfiguration.html +0 -109
  155. package/docs/interfaces/LDLogger.html +0 -189
  156. package/docs/interfaces/LDMessage.html +0 -119
  157. package/docs/interfaces/LDModelConfig.html +0 -139
  158. package/docs/interfaces/LDProviderConfig.html +0 -105
  159. package/docs/interfaces/LDTokenUsage.html +0 -129
  160. package/docs/interfaces/StructuredResponse.html +0 -129
  161. package/docs/types/LDAIConfigDefaultKind.html +0 -81
  162. package/docs/types/LDAIConfigKind.html +0 -81
  163. package/docs/types/LDAIConfigMode.html +0 -81
  164. package/docs/types/SupportedAIProvider.html +0 -81
  165. package/docs/variables/SUPPORTED_AI_PROVIDERS.html +0 -81
  166. package/jest.config.js +0 -7
  167. package/src/LDAIClientImpl.ts +0 -327
  168. package/src/LDAIConfigTrackerImpl.ts +0 -288
  169. package/src/LDClientMin.ts +0 -18
  170. package/src/api/LDAIClient.ts +0 -325
  171. package/src/api/chat/TrackedChat.ts +0 -163
  172. package/src/api/chat/index.ts +0 -2
  173. package/src/api/chat/types.ts +0 -24
  174. package/src/api/config/LDAIConfigTracker.ts +0 -238
  175. package/src/api/config/LDAIConfigUtils.ts +0 -212
  176. package/src/api/config/index.ts +0 -3
  177. package/src/api/config/types.ts +0 -260
  178. package/src/api/index.ts +0 -6
  179. package/src/api/judge/EvaluationSchemaBuilder.ts +0 -54
  180. package/src/api/judge/Judge.ts +0 -218
  181. package/src/api/judge/index.ts +0 -2
  182. package/src/api/judge/types.ts +0 -41
  183. package/src/api/metrics/BedrockTokenUsage.ts +0 -13
  184. package/src/api/metrics/LDAIMetrics.ts +0 -18
  185. package/src/api/metrics/LDFeedbackKind.ts +0 -13
  186. package/src/api/metrics/LDTokenUsage.ts +0 -19
  187. package/src/api/metrics/OpenAiUsage.ts +0 -13
  188. package/src/api/metrics/VercelAISDKTokenUsage.ts +0 -15
  189. package/src/api/metrics/index.ts +0 -6
  190. package/src/api/providers/AIProvider.ts +0 -94
  191. package/src/api/providers/AIProviderFactory.ts +0 -152
  192. package/src/api/providers/index.ts +0 -2
  193. package/src/index.ts +0 -24
  194. package/tsconfig.eslint.json +0 -5
  195. package/tsconfig.json +0 -21
  196. package/tsconfig.ref.json +0 -7
  197. package/typedoc.json +0 -5
@@ -1,260 +0,0 @@
1
- import { LDAIConfigTracker } from './LDAIConfigTracker';
2
-
3
- // ============================================================================
4
- // Foundation Types
5
- // ============================================================================
6
-
7
- /**
8
- * Information about prompts.
9
- */
10
- export interface LDMessage {
11
- /**
12
- * The role of the prompt.
13
- */
14
- role: 'user' | 'assistant' | 'system';
15
- /**
16
- * Content for the prompt.
17
- */
18
- content: string;
19
- }
20
-
21
- /**
22
- * Configuration related to the model.
23
- */
24
- export interface LDModelConfig {
25
- /**
26
- * The ID of the model.
27
- */
28
- name: string;
29
-
30
- /**
31
- * Model specific parameters.
32
- */
33
- parameters?: { [index: string]: unknown };
34
-
35
- /**
36
- * Additional user-specified parameters.
37
- */
38
- custom?: { [index: string]: unknown };
39
- }
40
-
41
- export interface LDProviderConfig {
42
- /**
43
- * The name of the provider.
44
- */
45
- name: string;
46
- }
47
-
48
- // ============================================================================
49
- // Judge Types
50
- // ============================================================================
51
-
52
- /**
53
- * Configuration for a single judge attachment.
54
- */
55
- export interface LDJudge {
56
- /** The key of the judge AI Config */
57
- key: string;
58
- /** Sampling rate for evaluation (0.0 to 1.0) */
59
- samplingRate: number;
60
- }
61
-
62
- /**
63
- * Configuration for judge attachment to AI Configs.
64
- */
65
- export interface LDJudgeConfiguration {
66
- /** Array of judge configurations */
67
- judges: LDJudge[];
68
- }
69
-
70
- // ============================================================================
71
- // Base AI Config Types
72
- // ============================================================================
73
-
74
- /**
75
- * Base AI Config interface for default implementations with optional enabled property.
76
- */
77
- export interface LDAIConfigDefault {
78
- /**
79
- * Optional model configuration.
80
- */
81
- model?: LDModelConfig;
82
-
83
- /**
84
- * Optional configuration for the provider.
85
- */
86
- provider?: LDProviderConfig;
87
-
88
- /**
89
- * Whether the configuration is enabled. Defaults to false when not provided.
90
- */
91
- enabled?: boolean;
92
- }
93
-
94
- /**
95
- * Base AI Config interface without mode-specific fields.
96
- */
97
- export interface LDAIConfig extends Omit<LDAIConfigDefault, 'enabled'> {
98
- /**
99
- * The key of the AI Config.
100
- */
101
- key: string;
102
- /**
103
- * Whether the configuration is enabled.
104
- */
105
- enabled: boolean;
106
-
107
- /**
108
- * A tracker which can be used to generate analytics.
109
- * Undefined for disabled configs.
110
- */
111
- tracker?: LDAIConfigTracker;
112
- }
113
-
114
- // ============================================================================
115
- // Default AI Config Implementation Types
116
- // ============================================================================
117
-
118
- /**
119
- * Default Agent-specific AI Config with instructions.
120
- */
121
- export interface LDAIAgentConfigDefault extends LDAIConfigDefault {
122
- /**
123
- * Instructions for the agent.
124
- */
125
- instructions?: string;
126
- /**
127
- * Judge configuration for AI Configs being evaluated.
128
- * References judge AI Configs that should evaluate this AI Config.
129
- */
130
- judgeConfiguration?: LDJudgeConfiguration;
131
- }
132
-
133
- /**
134
- * Default Completion AI Config (default mode).
135
- */
136
- export interface LDAICompletionConfigDefault extends LDAIConfigDefault {
137
- /**
138
- * Optional prompt data for completion configurations.
139
- */
140
- messages?: LDMessage[];
141
- /**
142
- * Judge configuration for AI Configs being evaluated.
143
- * References judge AI Configs that should evaluate this AI Config.
144
- */
145
- judgeConfiguration?: LDJudgeConfiguration;
146
- }
147
-
148
- /**
149
- * Default Judge-specific AI Config with required evaluation metric key.
150
- */
151
- export interface LDAIJudgeConfigDefault extends LDAIConfigDefault {
152
- /**
153
- * Optional prompt data for judge configurations.
154
- */
155
- messages?: LDMessage[];
156
- /**
157
- * Evaluation metric keys for judge configurations.
158
- * The keys of the metrics that this judge can evaluate.
159
- */
160
- evaluationMetricKeys?: string[];
161
- }
162
-
163
- /**
164
- * Union type for all default AI Config variants.
165
- */
166
- export type LDAIConfigDefaultKind =
167
- | LDAIAgentConfigDefault
168
- | LDAICompletionConfigDefault
169
- | LDAIJudgeConfigDefault;
170
-
171
- // ============================================================================
172
- // AI Config Implementation Types
173
- // ============================================================================
174
-
175
- /**
176
- * Agent-specific AI Config with instructions.
177
- */
178
- export interface LDAIAgentConfig extends LDAIConfig {
179
- /**
180
- * Instructions for the agent.
181
- */
182
- instructions?: string;
183
- /**
184
- * Judge configuration for AI Configs being evaluated.
185
- * References judge AI Configs that should evaluate this AI Config.
186
- */
187
- judgeConfiguration?: LDJudgeConfiguration;
188
- }
189
-
190
- /**
191
- * Completion AI Config (default mode).
192
- */
193
- export interface LDAICompletionConfig extends LDAIConfig {
194
- /**
195
- * Optional prompt data for completion configurations.
196
- */
197
- messages?: LDMessage[];
198
- /**
199
- * Judge configuration for AI Configs being evaluated.
200
- * References judge AI Configs that should evaluate this AI Config.
201
- */
202
- judgeConfiguration?: LDJudgeConfiguration;
203
- }
204
-
205
- /**
206
- * Judge-specific AI Config with required evaluation metric key.
207
- */
208
- export interface LDAIJudgeConfig extends LDAIConfig {
209
- /**
210
- * Optional prompt data for judge configurations.
211
- */
212
- messages?: LDMessage[];
213
- /**
214
- * Evaluation metric keys for judge configurations.
215
- * The keys of the metrics that this judge can evaluate.
216
- */
217
- evaluationMetricKeys: string[];
218
- }
219
-
220
- // ============================================================================
221
- // Union Types
222
- // ============================================================================
223
-
224
- /**
225
- * Union type for all AI Config variants.
226
- */
227
- export type LDAIConfigKind = LDAIAgentConfig | LDAICompletionConfig | LDAIJudgeConfig;
228
-
229
- // ============================================================================
230
- // Agent-Specific Request Type
231
- // ============================================================================
232
-
233
- /**
234
- * Configuration for a single agent request.
235
- */
236
- export interface LDAIAgentRequestConfig {
237
- /**
238
- * The agent key to retrieve.
239
- */
240
- key: string;
241
-
242
- /**
243
- * Default configuration for the agent.
244
- */
245
- defaultValue: LDAIAgentConfigDefault;
246
-
247
- /**
248
- * Variables for instructions interpolation.
249
- */
250
- variables?: Record<string, unknown>;
251
- }
252
-
253
- // ============================================================================
254
- // Mode Type
255
- // ============================================================================
256
-
257
- /**
258
- * Mode type for AI configurations.
259
- */
260
- export type LDAIConfigMode = 'completion' | 'agent' | 'judge';
package/src/api/index.ts DELETED
@@ -1,6 +0,0 @@
1
- export * from './config';
2
- export * from './chat';
3
- export * from './judge';
4
- export * from './metrics';
5
- export * from './LDAIClient';
6
- export * from './providers';
@@ -1,54 +0,0 @@
1
- /**
2
- * Internal class for building dynamic evaluation response schemas.
3
- * Not exported - only used internally by TrackedJudge.
4
- */
5
- class EvaluationSchemaBuilder {
6
- static build(evaluationMetricKeys: string[]): Record<string, unknown> {
7
- return {
8
- type: 'object',
9
- properties: {
10
- evaluations: {
11
- type: 'object',
12
- description: `Object containing evaluation results for ${evaluationMetricKeys.join(', ')} metrics`,
13
- properties: this._buildKeyProperties(evaluationMetricKeys),
14
- required: evaluationMetricKeys,
15
- additionalProperties: false,
16
- },
17
- },
18
- required: ['evaluations'],
19
- additionalProperties: false,
20
- } as const;
21
- }
22
-
23
- private static _buildKeyProperties(evaluationMetricKeys: string[]) {
24
- return evaluationMetricKeys.reduce(
25
- (acc, key) => {
26
- acc[key] = this._buildKeySchema(key);
27
- return acc;
28
- },
29
- {} as Record<string, unknown>,
30
- );
31
- }
32
-
33
- private static _buildKeySchema(key: string) {
34
- return {
35
- type: 'object',
36
- properties: {
37
- score: {
38
- type: 'number',
39
- minimum: 0,
40
- maximum: 1,
41
- description: `Score between 0.0 and 1.0 for ${key}`,
42
- },
43
- reasoning: {
44
- type: 'string',
45
- description: `Reasoning behind the score for ${key}`,
46
- },
47
- },
48
- required: ['score', 'reasoning'],
49
- additionalProperties: false,
50
- };
51
- }
52
- }
53
-
54
- export { EvaluationSchemaBuilder };
@@ -1,218 +0,0 @@
1
- import * as Mustache from 'mustache';
2
-
3
- import { LDLogger } from '@launchdarkly/js-server-sdk-common';
4
-
5
- import { ChatResponse } from '../chat/types';
6
- import { LDAIConfigTracker } from '../config/LDAIConfigTracker';
7
- import { LDAIJudgeConfig, LDMessage } from '../config/types';
8
- import { AIProvider } from '../providers/AIProvider';
9
- import { EvaluationSchemaBuilder } from './EvaluationSchemaBuilder';
10
- import { EvalScore, JudgeResponse, StructuredResponse } from './types';
11
-
12
- /**
13
- * Judge implementation that handles evaluation functionality and conversation management.
14
- *
15
- * According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
16
- * other AI Configs using structured output.
17
- */
18
- export class Judge {
19
- private readonly _logger?: LDLogger;
20
- private readonly _evaluationResponseStructure: Record<string, unknown>;
21
-
22
- constructor(
23
- private readonly _aiConfig: LDAIJudgeConfig,
24
- private readonly _aiConfigTracker: LDAIConfigTracker,
25
- private readonly _aiProvider: AIProvider,
26
- logger?: LDLogger,
27
- ) {
28
- this._logger = logger;
29
- this._evaluationResponseStructure = EvaluationSchemaBuilder.build(
30
- this._aiConfig.evaluationMetricKeys,
31
- );
32
- }
33
-
34
- /**
35
- * Evaluates an AI response using the judge's configuration.
36
- *
37
- * @param input The input prompt or question that was provided to the AI
38
- * @param output The AI-generated response to be evaluated
39
- * @param samplingRate Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
40
- * @returns Promise that resolves to evaluation results or undefined if not sampled
41
- */
42
- async evaluate(
43
- input: string,
44
- output: string,
45
- samplingRate: number = 1,
46
- ): Promise<JudgeResponse | undefined> {
47
- try {
48
- if (
49
- !this._aiConfig.evaluationMetricKeys ||
50
- this._aiConfig.evaluationMetricKeys.length === 0
51
- ) {
52
- this._logger?.warn(
53
- 'Judge configuration is missing required evaluationMetricKeys',
54
- this._aiConfigTracker.getTrackData(),
55
- );
56
- return undefined;
57
- }
58
-
59
- if (!this._aiConfig.messages) {
60
- this._logger?.warn(
61
- 'Judge configuration must include messages',
62
- this._aiConfigTracker.getTrackData(),
63
- );
64
- return undefined;
65
- }
66
-
67
- if (Math.random() > samplingRate) {
68
- this._logger?.debug(`Judge evaluation skipped due to sampling rate: ${samplingRate}`);
69
- return undefined;
70
- }
71
-
72
- const messages = this._constructEvaluationMessages(input, output);
73
-
74
- const response = await this._aiConfigTracker.trackMetricsOf(
75
- (result: StructuredResponse) => result.metrics,
76
- () => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure),
77
- );
78
-
79
- let { success } = response.metrics;
80
-
81
- const evals = this._parseEvaluationResponse(response.data);
82
-
83
- if (Object.keys(evals).length !== this._aiConfig.evaluationMetricKeys.length) {
84
- this._logger?.warn(
85
- 'Judge evaluation did not return all evaluations',
86
- this._aiConfigTracker.getTrackData(),
87
- );
88
- success = false;
89
- }
90
-
91
- return {
92
- evals,
93
- success,
94
- judgeConfigKey: this._aiConfig.key,
95
- };
96
- } catch (error) {
97
- this._logger?.error('Judge evaluation failed:', error);
98
- return {
99
- evals: {},
100
- success: false,
101
- error: error instanceof Error ? error.message : 'Unknown error',
102
- judgeConfigKey: this._aiConfig.key,
103
- };
104
- }
105
- }
106
-
107
- /**
108
- * Evaluates an AI response from chat messages and response.
109
- *
110
- * @param messages Array of messages representing the conversation history
111
- * @param response The AI response to be evaluated
112
- * @param samplingRatio Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
113
- * @returns Promise that resolves to evaluation results or undefined if not sampled
114
- */
115
- async evaluateMessages(
116
- messages: LDMessage[],
117
- response: ChatResponse,
118
- samplingRatio: number = 1,
119
- ): Promise<JudgeResponse | undefined> {
120
- const input = messages.length === 0 ? '' : messages.map((msg) => msg.content).join('\r\n');
121
- const output = response.message.content;
122
-
123
- return this.evaluate(input, output, samplingRatio);
124
- }
125
-
126
- /**
127
- * Returns the AI Config used by this judge.
128
- */
129
- getAIConfig(): LDAIJudgeConfig {
130
- return this._aiConfig;
131
- }
132
-
133
- /**
134
- * Returns the tracker associated with this judge.
135
- */
136
- getTracker(): LDAIConfigTracker {
137
- return this._aiConfigTracker;
138
- }
139
-
140
- /**
141
- * Returns the AI provider used by this judge.
142
- */
143
- getProvider(): AIProvider {
144
- return this._aiProvider;
145
- }
146
-
147
- /**
148
- * Constructs evaluation messages by combining judge's config messages with input/output.
149
- */
150
- private _constructEvaluationMessages(input: string, output: string): LDMessage[] {
151
- const messages: LDMessage[] = this._aiConfig.messages!.map((msg) => ({
152
- ...msg,
153
- content: this._interpolateMessage(msg.content, {
154
- message_history: input,
155
- response_to_evaluate: output,
156
- }),
157
- }));
158
-
159
- return messages;
160
- }
161
-
162
- /**
163
- * Interpolates message content with variables using Mustache templating.
164
- */
165
- private _interpolateMessage(content: string, variables: Record<string, string>): string {
166
- return Mustache.render(content, variables, undefined, { escape: (item: any) => item });
167
- }
168
-
169
- /**
170
- * Parses the structured evaluation response from the AI provider.
171
- */
172
- private _parseEvaluationResponse(data: Record<string, unknown>): Record<string, EvalScore> {
173
- const evaluations = data.evaluations as Record<string, unknown>;
174
- const results: Record<string, EvalScore> = {};
175
-
176
- if (!data.evaluations || typeof data.evaluations !== 'object') {
177
- this._logger?.warn('Invalid response: missing or invalid evaluations object');
178
- return results;
179
- }
180
-
181
- this._aiConfig.evaluationMetricKeys.forEach((metricKey) => {
182
- const evaluation = evaluations[metricKey];
183
-
184
- if (!evaluation || typeof evaluation !== 'object') {
185
- this._logger?.warn(
186
- `Missing evaluation for metric key: ${metricKey}`,
187
- this._aiConfigTracker.getTrackData(),
188
- );
189
- return;
190
- }
191
-
192
- const evalData = evaluation as Record<string, unknown>;
193
-
194
- if (typeof evalData.score !== 'number' || evalData.score < 0 || evalData.score > 1) {
195
- this._logger?.warn(
196
- `Invalid score evaluated for ${metricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`,
197
- this._aiConfigTracker.getTrackData(),
198
- );
199
- return;
200
- }
201
-
202
- if (typeof evalData.reasoning !== 'string') {
203
- this._logger?.warn(
204
- `Invalid reasoning evaluated for ${metricKey}: ${evalData.reasoning}. Reasoning must be a string`,
205
- this._aiConfigTracker.getTrackData(),
206
- );
207
- return;
208
- }
209
-
210
- results[metricKey] = {
211
- score: evalData.score,
212
- reasoning: evalData.reasoning,
213
- };
214
- });
215
-
216
- return results;
217
- }
218
- }
@@ -1,2 +0,0 @@
1
- export { Judge } from './Judge';
2
- export type { EvalScore, JudgeResponse, StructuredResponse } from './types';
@@ -1,41 +0,0 @@
1
- import { LDAIMetrics } from '../metrics/LDAIMetrics';
2
-
3
- /**
4
- * Structured response from AI models.
5
- */
6
- export interface StructuredResponse {
7
- /** The structured data returned by the model */
8
- data: Record<string, unknown>;
9
-
10
- /** The raw response from the model */
11
- rawResponse: string;
12
-
13
- /**
14
- * Metrics information including success status and token usage.
15
- */
16
- metrics: LDAIMetrics;
17
- }
18
-
19
- /**
20
- * Score and reasoning for a single evaluation metric.
21
- */
22
- export interface EvalScore {
23
- /** Score between 0.0 and 1.0 indicating the evaluation result for this metric */
24
- score: number;
25
- /** Reasoning behind the provided score for this metric */
26
- reasoning: string;
27
- }
28
-
29
- /**
30
- * Response from a judge evaluation containing scores and reasoning for multiple metrics.
31
- */
32
- export interface JudgeResponse {
33
- /** The key of the judge configuration that was used to generate this response */
34
- judgeConfigKey?: string;
35
- /** Dictionary where keys are metric names and values contain score and reasoning */
36
- evals: Record<string, EvalScore>;
37
- /** Whether the evaluation completed successfully */
38
- success: boolean;
39
- /** Error message if evaluation failed */
40
- error?: string;
41
- }
@@ -1,13 +0,0 @@
1
- import { LDTokenUsage } from './LDTokenUsage';
2
-
3
- export function createBedrockTokenUsage(data: {
4
- totalTokens?: number;
5
- inputTokens?: number;
6
- outputTokens?: number;
7
- }): LDTokenUsage {
8
- return {
9
- total: data.totalTokens || 0,
10
- input: data.inputTokens || 0,
11
- output: data.outputTokens || 0,
12
- };
13
- }
@@ -1,18 +0,0 @@
1
- import { LDTokenUsage } from './LDTokenUsage';
2
-
3
- /**
4
- * Metrics information for AI operations that includes success status and token usage.
5
- * This class combines success/failure tracking with token usage metrics.
6
- */
7
- export interface LDAIMetrics {
8
- /**
9
- * Whether the AI operation was successful.
10
- */
11
- success: boolean;
12
-
13
- /**
14
- * Token usage information for the operation.
15
- * This will be undefined if no token usage data is available.
16
- */
17
- usage?: LDTokenUsage;
18
- }
@@ -1,13 +0,0 @@
1
- /**
2
- * Feedback about the generated content.
3
- */
4
- export enum LDFeedbackKind {
5
- /**
6
- * The sentiment was positive.
7
- */
8
- Positive = 'positive',
9
- /**
10
- * The sentiment is negative.
11
- */
12
- Negative = 'negative',
13
- }
@@ -1,19 +0,0 @@
1
- /**
2
- * Information about token usage.
3
- */
4
- export interface LDTokenUsage {
5
- /**
6
- * Combined token usage.
7
- */
8
- total: number;
9
-
10
- /**
11
- * Number of tokens in the input.
12
- */
13
- input: number;
14
-
15
- /**
16
- * Number of tokens in the output.
17
- */
18
- output: number;
19
- }
@@ -1,13 +0,0 @@
1
- import { LDTokenUsage } from './LDTokenUsage';
2
-
3
- export function createOpenAiUsage(data: {
4
- total_tokens?: number;
5
- prompt_tokens?: number;
6
- completion_tokens?: number;
7
- }): LDTokenUsage {
8
- return {
9
- total: data.total_tokens ?? 0,
10
- input: data.prompt_tokens ?? 0,
11
- output: data.completion_tokens ?? 0,
12
- };
13
- }
@@ -1,15 +0,0 @@
1
- import { LDTokenUsage } from './LDTokenUsage';
2
-
3
- export function createVercelAISDKTokenUsage(data: {
4
- totalTokens?: number;
5
- inputTokens?: number;
6
- promptTokens?: number;
7
- outputTokens?: number;
8
- completionTokens?: number;
9
- }): LDTokenUsage {
10
- return {
11
- total: data.totalTokens ?? 0,
12
- input: data.inputTokens ?? data.promptTokens ?? 0,
13
- output: data.outputTokens ?? data.completionTokens ?? 0,
14
- };
15
- }
@@ -1,6 +0,0 @@
1
- export * from './BedrockTokenUsage';
2
- export * from './OpenAiUsage';
3
- export * from './LDFeedbackKind';
4
- export * from './LDAIMetrics';
5
- export * from './LDTokenUsage';
6
- export * from './VercelAISDKTokenUsage';