@launchdarkly/server-sdk-ai 0.12.3 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. package/CHANGELOG.md +44 -0
  2. package/README.md +2 -2
  3. package/__tests__/Judge.test.ts +496 -0
  4. package/__tests__/LDAIClientImpl.test.ts +530 -323
  5. package/__tests__/LDAIConfigTrackerImpl.test.ts +50 -290
  6. package/__tests__/TrackedChat.test.ts +4 -5
  7. package/dist/package.json +53 -0
  8. package/dist/src/LDAIClientImpl.d.ts +39 -0
  9. package/dist/src/LDAIClientImpl.d.ts.map +1 -0
  10. package/dist/src/LDAIClientImpl.js +164 -0
  11. package/dist/src/LDAIClientImpl.js.map +1 -0
  12. package/dist/{LDAIConfigTrackerImpl.d.ts → src/LDAIConfigTrackerImpl.d.ts} +13 -11
  13. package/dist/src/LDAIConfigTrackerImpl.d.ts.map +1 -0
  14. package/dist/{LDAIConfigTrackerImpl.js → src/LDAIConfigTrackerImpl.js} +59 -44
  15. package/dist/src/LDAIConfigTrackerImpl.js.map +1 -0
  16. package/dist/src/LDClientMin.d.ts.map +1 -0
  17. package/dist/{LDClientMin.js.map → src/LDClientMin.js.map} +1 -1
  18. package/dist/src/api/LDAIClient.d.ts +258 -0
  19. package/dist/src/api/LDAIClient.d.ts.map +1 -0
  20. package/dist/{api → src/api}/LDAIClient.js.map +1 -1
  21. package/dist/{api → src/api}/chat/TrackedChat.d.ts +22 -4
  22. package/dist/src/api/chat/TrackedChat.d.ts.map +1 -0
  23. package/dist/{api → src/api}/chat/TrackedChat.js +43 -2
  24. package/dist/src/api/chat/TrackedChat.js.map +1 -0
  25. package/dist/src/api/chat/index.d.ts.map +1 -0
  26. package/dist/src/api/chat/index.js.map +1 -0
  27. package/dist/src/api/chat/types.d.ts +22 -0
  28. package/dist/src/api/chat/types.d.ts.map +1 -0
  29. package/dist/{api → src/api}/chat/types.js.map +1 -1
  30. package/dist/{api → src/api}/config/LDAIConfigTracker.d.ts +41 -22
  31. package/dist/src/api/config/LDAIConfigTracker.d.ts.map +1 -0
  32. package/dist/src/api/config/LDAIConfigTracker.js.map +1 -0
  33. package/dist/src/api/config/LDAIConfigUtils.d.ts +2 -0
  34. package/dist/src/api/config/LDAIConfigUtils.d.ts.map +1 -0
  35. package/dist/src/api/config/LDAIConfigUtils.js +141 -0
  36. package/dist/src/api/config/LDAIConfigUtils.js.map +1 -0
  37. package/dist/src/api/config/index.d.ts +3 -0
  38. package/dist/src/api/config/index.d.ts.map +1 -0
  39. package/dist/{api/agents → src/api/config}/index.js +1 -1
  40. package/dist/src/api/config/index.js.map +1 -0
  41. package/dist/src/api/config/types.d.ts +202 -0
  42. package/dist/src/api/config/types.d.ts.map +1 -0
  43. package/dist/{api/agents/LDAIAgent.js → src/api/config/types.js} +1 -1
  44. package/dist/src/api/config/types.js.map +1 -0
  45. package/dist/{api → src/api}/index.d.ts +1 -1
  46. package/dist/src/api/index.d.ts.map +1 -0
  47. package/dist/{api → src/api}/index.js +1 -1
  48. package/dist/src/api/index.js.map +1 -0
  49. package/dist/src/api/judge/EvaluationSchemaBuilder.d.ts +11 -0
  50. package/dist/src/api/judge/EvaluationSchemaBuilder.d.ts.map +1 -0
  51. package/dist/src/api/judge/EvaluationSchemaBuilder.js +52 -0
  52. package/dist/src/api/judge/EvaluationSchemaBuilder.js.map +1 -0
  53. package/dist/src/api/judge/Judge.d.ts +63 -0
  54. package/dist/src/api/judge/Judge.d.ts.map +1 -0
  55. package/dist/src/api/judge/Judge.js +149 -0
  56. package/dist/src/api/judge/Judge.js.map +1 -0
  57. package/dist/src/api/judge/index.d.ts +3 -0
  58. package/dist/src/api/judge/index.d.ts.map +1 -0
  59. package/dist/src/api/judge/index.js +6 -0
  60. package/dist/src/api/judge/index.js.map +1 -0
  61. package/dist/src/api/judge/types.d.ts +35 -0
  62. package/dist/src/api/judge/types.d.ts.map +1 -0
  63. package/dist/{api/config/LDAIConfig.js → src/api/judge/types.js} +1 -1
  64. package/dist/src/api/judge/types.js.map +1 -0
  65. package/dist/src/api/metrics/BedrockTokenUsage.d.ts.map +1 -0
  66. package/dist/src/api/metrics/BedrockTokenUsage.js.map +1 -0
  67. package/dist/src/api/metrics/LDAIMetrics.d.ts.map +1 -0
  68. package/dist/src/api/metrics/LDAIMetrics.js.map +1 -0
  69. package/dist/src/api/metrics/LDFeedbackKind.d.ts.map +1 -0
  70. package/dist/src/api/metrics/LDFeedbackKind.js.map +1 -0
  71. package/dist/src/api/metrics/LDTokenUsage.d.ts.map +1 -0
  72. package/dist/src/api/metrics/LDTokenUsage.js.map +1 -0
  73. package/dist/src/api/metrics/OpenAiUsage.d.ts.map +1 -0
  74. package/dist/src/api/metrics/OpenAiUsage.js.map +1 -0
  75. package/dist/src/api/metrics/VercelAISDKTokenUsage.d.ts.map +1 -0
  76. package/dist/src/api/metrics/VercelAISDKTokenUsage.js.map +1 -0
  77. package/dist/src/api/metrics/index.d.ts.map +1 -0
  78. package/dist/src/api/metrics/index.js.map +1 -0
  79. package/dist/{api → src/api}/providers/AIProvider.d.ts +20 -3
  80. package/dist/src/api/providers/AIProvider.d.ts.map +1 -0
  81. package/dist/src/api/providers/AIProvider.js +88 -0
  82. package/dist/src/api/providers/AIProvider.js.map +1 -0
  83. package/dist/{api → src/api}/providers/AIProviderFactory.d.ts +2 -2
  84. package/dist/src/api/providers/AIProviderFactory.d.ts.map +1 -0
  85. package/dist/src/api/providers/AIProviderFactory.js.map +1 -0
  86. package/dist/src/api/providers/index.d.ts.map +1 -0
  87. package/dist/src/api/providers/index.js.map +1 -0
  88. package/dist/src/index.d.ts.map +1 -0
  89. package/dist/src/index.js.map +1 -0
  90. package/docs/assets/search.js +1 -1
  91. package/docs/classes/AIProvider.html +55 -20
  92. package/docs/classes/AIProviderFactory.html +27 -17
  93. package/docs/classes/Judge.html +322 -0
  94. package/docs/classes/TrackedChat.html +97 -29
  95. package/docs/enums/LDFeedbackKind.html +22 -12
  96. package/docs/functions/createBedrockTokenUsage.html +20 -10
  97. package/docs/functions/createOpenAiUsage.html +20 -10
  98. package/docs/functions/createVercelAISDKTokenUsage.html +20 -10
  99. package/docs/functions/initAi.html +20 -10
  100. package/docs/index.html +36 -16
  101. package/docs/interfaces/ChatResponse.html +35 -14
  102. package/docs/interfaces/EvalScore.html +119 -0
  103. package/docs/interfaces/JudgeResponse.html +129 -0
  104. package/docs/interfaces/LDAIAgentConfig.html +81 -33
  105. package/docs/interfaces/{LDAIAgent.html → LDAIAgentConfigDefault.html} +51 -41
  106. package/docs/interfaces/LDAIAgentRequestConfig.html +129 -0
  107. package/docs/interfaces/LDAIClient.html +234 -40
  108. package/docs/interfaces/{VercelAISDKConfig.html → LDAICompletionConfig.html} +86 -86
  109. package/docs/interfaces/LDAICompletionConfigDefault.html +155 -0
  110. package/docs/interfaces/LDAIConfig.html +45 -75
  111. package/docs/interfaces/LDAIConfigDefault.html +133 -0
  112. package/docs/interfaces/LDAIConfigTracker.html +140 -59
  113. package/docs/interfaces/LDAIJudgeConfig.html +167 -0
  114. package/docs/interfaces/LDAIJudgeConfigDefault.html +155 -0
  115. package/docs/interfaces/LDAIMetrics.html +22 -12
  116. package/docs/interfaces/LDJudge.html +119 -0
  117. package/docs/interfaces/{VercelAISDKMapOptions.html → LDJudgeConfiguration.html} +36 -19
  118. package/docs/interfaces/LDLogger.html +19 -9
  119. package/docs/interfaces/LDMessage.html +22 -12
  120. package/docs/interfaces/LDModelConfig.html +23 -13
  121. package/docs/interfaces/LDProviderConfig.html +21 -11
  122. package/docs/interfaces/LDTokenUsage.html +23 -13
  123. package/docs/interfaces/StructuredResponse.html +129 -0
  124. package/docs/types/{VercelAISDKProvider.html → LDAIConfigDefaultKind.html} +26 -31
  125. package/docs/types/{LDAIAgentDefaults.html → LDAIConfigKind.html} +24 -14
  126. package/docs/types/{LDAIDefaults.html → LDAIConfigMode.html} +24 -24
  127. package/docs/types/SupportedAIProvider.html +20 -10
  128. package/docs/variables/SUPPORTED_AI_PROVIDERS.html +20 -10
  129. package/package.json +3 -3
  130. package/src/LDAIClientImpl.ts +222 -176
  131. package/src/LDAIConfigTrackerImpl.ts +73 -54
  132. package/src/api/LDAIClient.ts +166 -33
  133. package/src/api/chat/TrackedChat.ts +64 -5
  134. package/src/api/chat/types.ts +8 -1
  135. package/src/api/config/LDAIConfigTracker.ts +46 -27
  136. package/src/api/config/LDAIConfigUtils.ts +201 -0
  137. package/src/api/config/index.ts +2 -2
  138. package/src/api/config/types.ts +256 -0
  139. package/src/api/index.ts +1 -1
  140. package/src/api/judge/EvaluationSchemaBuilder.ts +54 -0
  141. package/src/api/judge/Judge.ts +216 -0
  142. package/src/api/judge/index.ts +2 -0
  143. package/src/api/judge/types.ts +39 -0
  144. package/src/api/providers/AIProvider.ts +54 -3
  145. package/src/api/providers/AIProviderFactory.ts +4 -4
  146. package/tsconfig.json +3 -3
  147. package/tsconfig.ref.json +1 -1
  148. package/__tests__/LDAIConfigMapper.test.ts +0 -159
  149. package/dist/LDAIClientImpl.d.ts +0 -23
  150. package/dist/LDAIClientImpl.d.ts.map +0 -1
  151. package/dist/LDAIClientImpl.js +0 -128
  152. package/dist/LDAIClientImpl.js.map +0 -1
  153. package/dist/LDAIConfigMapper.d.ts +0 -10
  154. package/dist/LDAIConfigMapper.d.ts.map +0 -1
  155. package/dist/LDAIConfigMapper.js +0 -55
  156. package/dist/LDAIConfigMapper.js.map +0 -1
  157. package/dist/LDAIConfigTrackerImpl.d.ts.map +0 -1
  158. package/dist/LDAIConfigTrackerImpl.js.map +0 -1
  159. package/dist/LDClientMin.d.ts.map +0 -1
  160. package/dist/api/LDAIClient.d.ts +0 -169
  161. package/dist/api/LDAIClient.d.ts.map +0 -1
  162. package/dist/api/agents/LDAIAgent.d.ts +0 -32
  163. package/dist/api/agents/LDAIAgent.d.ts.map +0 -1
  164. package/dist/api/agents/LDAIAgent.js.map +0 -1
  165. package/dist/api/agents/index.d.ts +0 -2
  166. package/dist/api/agents/index.d.ts.map +0 -1
  167. package/dist/api/agents/index.js.map +0 -1
  168. package/dist/api/chat/TrackedChat.d.ts.map +0 -1
  169. package/dist/api/chat/TrackedChat.js.map +0 -1
  170. package/dist/api/chat/index.d.ts.map +0 -1
  171. package/dist/api/chat/index.js.map +0 -1
  172. package/dist/api/chat/types.d.ts +0 -16
  173. package/dist/api/chat/types.d.ts.map +0 -1
  174. package/dist/api/config/LDAIConfig.d.ts +0 -92
  175. package/dist/api/config/LDAIConfig.d.ts.map +0 -1
  176. package/dist/api/config/LDAIConfig.js.map +0 -1
  177. package/dist/api/config/LDAIConfigTracker.d.ts.map +0 -1
  178. package/dist/api/config/LDAIConfigTracker.js.map +0 -1
  179. package/dist/api/config/VercelAISDK.d.ts +0 -19
  180. package/dist/api/config/VercelAISDK.d.ts.map +0 -1
  181. package/dist/api/config/VercelAISDK.js +0 -3
  182. package/dist/api/config/VercelAISDK.js.map +0 -1
  183. package/dist/api/config/index.d.ts +0 -4
  184. package/dist/api/config/index.d.ts.map +0 -1
  185. package/dist/api/config/index.js +0 -19
  186. package/dist/api/config/index.js.map +0 -1
  187. package/dist/api/index.d.ts.map +0 -1
  188. package/dist/api/index.js.map +0 -1
  189. package/dist/api/metrics/BedrockTokenUsage.d.ts.map +0 -1
  190. package/dist/api/metrics/BedrockTokenUsage.js.map +0 -1
  191. package/dist/api/metrics/LDAIMetrics.d.ts.map +0 -1
  192. package/dist/api/metrics/LDAIMetrics.js.map +0 -1
  193. package/dist/api/metrics/LDFeedbackKind.d.ts.map +0 -1
  194. package/dist/api/metrics/LDFeedbackKind.js.map +0 -1
  195. package/dist/api/metrics/LDTokenUsage.d.ts.map +0 -1
  196. package/dist/api/metrics/LDTokenUsage.js.map +0 -1
  197. package/dist/api/metrics/OpenAiUsage.d.ts.map +0 -1
  198. package/dist/api/metrics/OpenAiUsage.js.map +0 -1
  199. package/dist/api/metrics/VercelAISDKTokenUsage.d.ts.map +0 -1
  200. package/dist/api/metrics/VercelAISDKTokenUsage.js.map +0 -1
  201. package/dist/api/metrics/index.d.ts.map +0 -1
  202. package/dist/api/metrics/index.js.map +0 -1
  203. package/dist/api/providers/AIProvider.d.ts.map +0 -1
  204. package/dist/api/providers/AIProvider.js +0 -31
  205. package/dist/api/providers/AIProvider.js.map +0 -1
  206. package/dist/api/providers/AIProviderFactory.d.ts.map +0 -1
  207. package/dist/api/providers/AIProviderFactory.js.map +0 -1
  208. package/dist/api/providers/index.d.ts.map +0 -1
  209. package/dist/api/providers/index.js.map +0 -1
  210. package/dist/index.d.ts.map +0 -1
  211. package/dist/index.js.map +0 -1
  212. package/src/LDAIConfigMapper.ts +0 -65
  213. package/src/api/agents/LDAIAgent.ts +0 -36
  214. package/src/api/agents/index.ts +0 -1
  215. package/src/api/config/LDAIConfig.ts +0 -101
  216. package/src/api/config/VercelAISDK.ts +0 -21
  217. /package/dist/{LDClientMin.d.ts → src/LDClientMin.d.ts} +0 -0
  218. /package/dist/{LDClientMin.js → src/LDClientMin.js} +0 -0
  219. /package/dist/{api → src/api}/LDAIClient.js +0 -0
  220. /package/dist/{api → src/api}/chat/index.d.ts +0 -0
  221. /package/dist/{api → src/api}/chat/index.js +0 -0
  222. /package/dist/{api → src/api}/chat/types.js +0 -0
  223. /package/dist/{api → src/api}/config/LDAIConfigTracker.js +0 -0
  224. /package/dist/{api → src/api}/metrics/BedrockTokenUsage.d.ts +0 -0
  225. /package/dist/{api → src/api}/metrics/BedrockTokenUsage.js +0 -0
  226. /package/dist/{api → src/api}/metrics/LDAIMetrics.d.ts +0 -0
  227. /package/dist/{api → src/api}/metrics/LDAIMetrics.js +0 -0
  228. /package/dist/{api → src/api}/metrics/LDFeedbackKind.d.ts +0 -0
  229. /package/dist/{api → src/api}/metrics/LDFeedbackKind.js +0 -0
  230. /package/dist/{api → src/api}/metrics/LDTokenUsage.d.ts +0 -0
  231. /package/dist/{api → src/api}/metrics/LDTokenUsage.js +0 -0
  232. /package/dist/{api → src/api}/metrics/OpenAiUsage.d.ts +0 -0
  233. /package/dist/{api → src/api}/metrics/OpenAiUsage.js +0 -0
  234. /package/dist/{api → src/api}/metrics/VercelAISDKTokenUsage.d.ts +0 -0
  235. /package/dist/{api → src/api}/metrics/VercelAISDKTokenUsage.js +0 -0
  236. /package/dist/{api → src/api}/metrics/index.d.ts +0 -0
  237. /package/dist/{api → src/api}/metrics/index.js +0 -0
  238. /package/dist/{api → src/api}/providers/AIProviderFactory.js +0 -0
  239. /package/dist/{api → src/api}/providers/index.d.ts +0 -0
  240. /package/dist/{api → src/api}/providers/index.js +0 -0
  241. /package/dist/{index.d.ts → src/index.d.ts} +0 -0
  242. /package/dist/{index.js → src/index.js} +0 -0
package/CHANGELOG.md CHANGED
@@ -1,5 +1,49 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.14.0](https://github.com/launchdarkly/js-core/compare/server-sdk-ai-v0.13.0...server-sdk-ai-v0.14.0) (2025-11-06)
4
+
5
+
6
+ ### ⚠ BREAKING CHANGES
7
+
8
+ * Removed deprecated Vercel methods ([#983](https://github.com/launchdarkly/js-core/issues/983))
9
+ * Add support for real time judge evals ([#969](https://github.com/launchdarkly/js-core/issues/969))
10
+ * AI Config defaults require the "enabled" attribute
11
+ * Renamed LDAIAgentConfig to LDAIAgentConfigRequest for clarity
12
+ * Renamed LDAIAgent to LDAIAgentConfig *note the previous use of this name
13
+ * Renamed LDAIAgentDefault to LDAIAgentConfigDefault for clarity
14
+ * Renamed LDAIDefaults to LDAICompletionConfigDefault for clarity
15
+
16
+ ### Features
17
+
18
+ * Add support for real time judge evals ([#969](https://github.com/launchdarkly/js-core/issues/969)) ([6ecd9ab](https://github.com/launchdarkly/js-core/commit/6ecd9ab4d97f6445adfd377709f14d7f3b420363))
19
+ * Added createJudge method ([6ecd9ab](https://github.com/launchdarkly/js-core/commit/6ecd9ab4d97f6445adfd377709f14d7f3b420363))
20
+ * Added judgeConfig method to AI SDK to retrieve an AI Judge Config ([6ecd9ab](https://github.com/launchdarkly/js-core/commit/6ecd9ab4d97f6445adfd377709f14d7f3b420363))
21
+ * Added trackEvalScores method to config tracker ([6ecd9ab](https://github.com/launchdarkly/js-core/commit/6ecd9ab4d97f6445adfd377709f14d7f3b420363))
22
+ * Chat will evaluate responses with configured judges ([6ecd9ab](https://github.com/launchdarkly/js-core/commit/6ecd9ab4d97f6445adfd377709f14d7f3b420363))
23
+ * Include AI SDK version in tracking information ([#985](https://github.com/launchdarkly/js-core/issues/985)) ([ef90564](https://github.com/launchdarkly/js-core/commit/ef90564ee1ed9411e77b836d2b5b8037ff671b07))
24
+ * Removed deprecated Vercel methods ([#983](https://github.com/launchdarkly/js-core/issues/983)) ([960a499](https://github.com/launchdarkly/js-core/commit/960a49927e795890e5093b0156ec6d721c3066fd))
25
+
26
+
27
+ ### Bug Fixes
28
+
29
+ * AI Config defaults require the "enabled" attribute ([6ecd9ab](https://github.com/launchdarkly/js-core/commit/6ecd9ab4d97f6445adfd377709f14d7f3b420363))
30
+ * Renamed LDAIAgent to LDAIAgentConfig *note the previous use of this name ([6ecd9ab](https://github.com/launchdarkly/js-core/commit/6ecd9ab4d97f6445adfd377709f14d7f3b420363))
31
+ * Renamed LDAIAgentConfig to LDAIAgentConfigRequest for clarity ([6ecd9ab](https://github.com/launchdarkly/js-core/commit/6ecd9ab4d97f6445adfd377709f14d7f3b420363))
32
+ * Renamed LDAIAgentDefault to LDAIAgentConfigDefault for clarity ([6ecd9ab](https://github.com/launchdarkly/js-core/commit/6ecd9ab4d97f6445adfd377709f14d7f3b420363))
33
+ * Renamed LDAIDefaults to LDAICompletionConfigDefault for clarity ([6ecd9ab](https://github.com/launchdarkly/js-core/commit/6ecd9ab4d97f6445adfd377709f14d7f3b420363))
34
+
35
+ ## [0.13.0](https://github.com/launchdarkly/js-core/compare/server-sdk-ai-v0.12.3...server-sdk-ai-v0.13.0) (2025-11-04)
36
+
37
+
38
+ ### Features
39
+
40
+ * Add support for trackStreamMetricsOf method ([#971](https://github.com/launchdarkly/js-core/issues/971)) ([e18979e](https://github.com/launchdarkly/js-core/commit/e18979e27f4542552762a30a390749541daa3749))
41
+
42
+
43
+ ### Bug Fixes
44
+
45
+ * Deprecated toVercelAISDK, trackVercelAISDKStreamTextMetrics, use `@launchdarkly/server-sdk-ai-vercel` package ([e18979e](https://github.com/launchdarkly/js-core/commit/e18979e27f4542552762a30a390749541daa3749))
46
+
3
47
  ## [0.12.3](https://github.com/launchdarkly/js-core/compare/server-sdk-ai-v0.12.2...server-sdk-ai-v0.12.3) (2025-10-24)
4
48
 
5
49
 
package/README.md CHANGED
@@ -97,7 +97,7 @@ if (aiConfig.enabled) {
97
97
 
98
98
  ```typescript
99
99
  // Use the same defaultConfig from the retrieval section above
100
- const chat = await aiClient.initChat(
100
+ const chat = await aiClient.createChat(
101
101
  'customer-support-chat',
102
102
  context,
103
103
  defaultConfig,
@@ -134,7 +134,7 @@ const llm = await LangChainProvider.createLangChainModel(aiConfig);
134
134
 
135
135
  // Use with tracking
136
136
  const response = await aiConfig.tracker.trackMetricsOf(
137
- (result) => LangChainProvider.createAIMetrics(result),
137
+ LangChainProvider.getAIMetricsFromResponse,
138
138
  () => llm.invoke(messages)
139
139
  );
140
140
 
@@ -0,0 +1,496 @@
1
+ import { LDLogger } from '@launchdarkly/js-server-sdk-common';
2
+
3
+ import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker';
4
+ import { LDAIJudgeConfig, LDMessage } from '../src/api/config/types';
5
+ import { Judge } from '../src/api/judge/Judge';
6
+ import { StructuredResponse } from '../src/api/judge/types';
7
+ import { AIProvider } from '../src/api/providers/AIProvider';
8
+
9
+ describe('Judge', () => {
10
+ let mockProvider: jest.Mocked<AIProvider>;
11
+ let mockTracker: jest.Mocked<LDAIConfigTracker>;
12
+ let mockLogger: jest.Mocked<LDLogger>;
13
+ let judgeConfig: LDAIJudgeConfig;
14
+
15
+ const mockTrackData = {
16
+ variationKey: 'test-variation',
17
+ configKey: 'test-config',
18
+ version: 1,
19
+ };
20
+
21
+ beforeEach(() => {
22
+ // Mock the AIProvider - only mock what's actually used
23
+ mockProvider = {
24
+ invokeStructuredModel: jest.fn(),
25
+ } as any;
26
+
27
+ // Mock the LDAIConfigTracker - only mock what's actually used
28
+ mockTracker = {
29
+ trackMetricsOf: jest.fn(),
30
+ getTrackData: jest.fn().mockReturnValue(mockTrackData),
31
+ } as any;
32
+
33
+ // Mock the logger - only mock what's actually used
34
+ mockLogger = {
35
+ debug: jest.fn(),
36
+ warn: jest.fn(),
37
+ error: jest.fn(),
38
+ } as any;
39
+
40
+ // Create a basic judge config
41
+ judgeConfig = {
42
+ enabled: true,
43
+ messages: [
44
+ { role: 'system', content: 'You are a helpful judge that evaluates AI responses.' },
45
+ {
46
+ role: 'user',
47
+ content:
48
+ 'Evaluate and report scores for important metrics: Input: {{message_history}}, Output: {{response_to_evaluate}}',
49
+ },
50
+ ],
51
+ model: { name: 'gpt-4' },
52
+ provider: { name: 'openai' },
53
+ tracker: mockTracker,
54
+ evaluationMetricKeys: ['relevance', 'accuracy', 'helpfulness'],
55
+ };
56
+ });
57
+
58
+ describe('constructor', () => {
59
+ it('initializes with proper configuration', () => {
60
+ const judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
61
+
62
+ expect(judge).toBeDefined();
63
+ });
64
+ });
65
+
66
+ describe('evaluate', () => {
67
+ let judge: Judge;
68
+
69
+ beforeEach(() => {
70
+ judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
71
+ });
72
+
73
+ it('evaluates AI response successfully', async () => {
74
+ const mockStructuredResponse: StructuredResponse = {
75
+ data: {
76
+ evaluations: {
77
+ relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
78
+ accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
79
+ helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
80
+ },
81
+ },
82
+ rawResponse: JSON.stringify({
83
+ evaluations: {
84
+ relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
85
+ accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
86
+ helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
87
+ },
88
+ }),
89
+ metrics: {
90
+ success: true,
91
+ usage: {
92
+ total: 100,
93
+ input: 50,
94
+ output: 50,
95
+ },
96
+ },
97
+ };
98
+
99
+ mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
100
+ mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
101
+
102
+ const result = await judge.evaluate(
103
+ 'What is the capital of France?',
104
+ 'Paris is the capital of France.',
105
+ );
106
+
107
+ expect(result).toEqual({
108
+ evals: {
109
+ relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
110
+ accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
111
+ helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
112
+ },
113
+ success: true,
114
+ });
115
+
116
+ expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith(
117
+ expect.arrayContaining([
118
+ expect.objectContaining({
119
+ role: 'system',
120
+ content: 'You are a helpful judge that evaluates AI responses.',
121
+ }),
122
+ expect.objectContaining({
123
+ role: 'user',
124
+ content:
125
+ 'Evaluate and report scores for important metrics: Input: What is the capital of France?, Output: Paris is the capital of France.',
126
+ }),
127
+ ]),
128
+ expect.any(Object), // evaluation response structure
129
+ );
130
+ });
131
+
132
+ it('handles sampling rate correctly', async () => {
133
+ // Mock Math.random to return 0.3 (should be sampled with rate 0.5 since 0.3 <= 0.5)
134
+ const originalRandom = Math.random;
135
+ Math.random = jest.fn().mockReturnValue(0.3);
136
+
137
+ // Mock the structured response
138
+ const mockStructuredResponse: StructuredResponse = {
139
+ data: {
140
+ evaluations: {
141
+ relevance: { score: 0.8, reasoning: 'Good' },
142
+ accuracy: { score: 0.9, reasoning: 'Accurate' },
143
+ helpfulness: { score: 0.7, reasoning: 'Helpful' },
144
+ },
145
+ },
146
+ rawResponse: JSON.stringify({
147
+ evaluations: {
148
+ relevance: { score: 0.8, reasoning: 'Good' },
149
+ accuracy: { score: 0.9, reasoning: 'Accurate' },
150
+ helpfulness: { score: 0.7, reasoning: 'Helpful' },
151
+ },
152
+ }),
153
+ metrics: {
154
+ success: true,
155
+ usage: { total: 100, input: 50, output: 50 },
156
+ },
157
+ };
158
+
159
+ mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
160
+ mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
161
+
162
+ const result = await judge.evaluate('test input', 'test output', 0.5);
163
+
164
+ expect(result).toBeDefined();
165
+ expect(mockProvider.invokeStructuredModel).toHaveBeenCalled();
166
+
167
+ Math.random = originalRandom;
168
+ });
169
+
170
+ it('returns undefined when not sampled', async () => {
171
+ // Mock Math.random to return 0.8 (should not be sampled with rate 0.5 since 0.8 > 0.5)
172
+ const originalRandom = Math.random;
173
+ Math.random = jest.fn().mockReturnValue(0.8);
174
+
175
+ const result = await judge.evaluate('test input', 'test output', 0.5);
176
+
177
+ expect(result).toBeUndefined();
178
+ expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled();
179
+ expect(mockLogger.debug).toHaveBeenCalledWith(
180
+ 'Judge evaluation skipped due to sampling rate: 0.5',
181
+ );
182
+
183
+ Math.random = originalRandom;
184
+ });
185
+
186
+ it('returns undefined when evaluationMetricKeys is empty', async () => {
187
+ const configWithoutMetrics: LDAIJudgeConfig = {
188
+ ...judgeConfig,
189
+ evaluationMetricKeys: [],
190
+ };
191
+ const judgeWithoutMetrics = new Judge(
192
+ configWithoutMetrics,
193
+ mockTracker,
194
+ mockProvider,
195
+ mockLogger,
196
+ );
197
+
198
+ const result = await judgeWithoutMetrics.evaluate('test input', 'test output');
199
+
200
+ expect(result).toBeUndefined();
201
+ expect(mockLogger.warn).toHaveBeenCalledWith(
202
+ 'Judge configuration is missing required evaluationMetricKeys',
203
+ mockTrackData,
204
+ );
205
+ });
206
+
207
+ it('returns undefined when messages are missing', async () => {
208
+ const configWithoutMessages: LDAIJudgeConfig = {
209
+ ...judgeConfig,
210
+ messages: undefined,
211
+ };
212
+ const judgeWithoutMessages = new Judge(
213
+ configWithoutMessages,
214
+ mockTracker,
215
+ mockProvider,
216
+ mockLogger,
217
+ );
218
+
219
+ const result = await judgeWithoutMessages.evaluate('test input', 'test output');
220
+
221
+ expect(result).toBeUndefined();
222
+ expect(mockLogger.warn).toHaveBeenCalledWith(
223
+ 'Judge configuration must include messages',
224
+ mockTrackData,
225
+ );
226
+ });
227
+
228
+ it('returns partial evaluations when some metrics are missing', async () => {
229
+ const mockStructuredResponse: StructuredResponse = {
230
+ data: {
231
+ evaluations: {
232
+ relevance: { score: 0.8, reasoning: 'Good' },
233
+ // accuracy is missing
234
+ helpfulness: { score: 0.7, reasoning: 'Helpful' },
235
+ },
236
+ },
237
+ rawResponse: JSON.stringify({
238
+ evaluations: {
239
+ relevance: { score: 0.8, reasoning: 'Good' },
240
+ helpfulness: { score: 0.7, reasoning: 'Helpful' },
241
+ },
242
+ }),
243
+ metrics: {
244
+ success: true,
245
+ usage: { total: 100, input: 50, output: 50 },
246
+ },
247
+ };
248
+
249
+ mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
250
+ mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
251
+
252
+ const result = await judge.evaluate('test input', 'test output');
253
+
254
+ // When one metric is missing, it returns the partial evals it has with success: false
255
+ expect(result).toEqual({
256
+ evals: {
257
+ relevance: { score: 0.8, reasoning: 'Good' },
258
+ helpfulness: { score: 0.7, reasoning: 'Helpful' },
259
+ },
260
+ success: false,
261
+ });
262
+ });
263
+
264
+ it('returns empty evaluations when response structure is malformed', async () => {
265
+ const mockStructuredResponse: StructuredResponse = {
266
+ data: {
267
+ // Missing 'evaluations' wrapper - malformed structure
268
+ relevance: { score: 0.8, reasoning: 'Good' },
269
+ accuracy: { score: 0.9, reasoning: 'Accurate' },
270
+ helpfulness: { score: 0.7, reasoning: 'Helpful' },
271
+ },
272
+ rawResponse: JSON.stringify({
273
+ relevance: { score: 0.8, reasoning: 'Good' },
274
+ accuracy: { score: 0.9, reasoning: 'Accurate' },
275
+ helpfulness: { score: 0.7, reasoning: 'Helpful' },
276
+ }),
277
+ metrics: {
278
+ success: true,
279
+ usage: { total: 100, input: 50, output: 50 },
280
+ },
281
+ };
282
+
283
+ mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
284
+ mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
285
+
286
+ const result = await judge.evaluate('test input', 'test output');
287
+
288
+ // When the structure is completely wrong, returns empty evals with success: false
289
+ expect(result).toEqual({
290
+ evals: {},
291
+ success: false,
292
+ });
293
+ });
294
+
295
+ it('handles provider errors gracefully', async () => {
296
+ const error = new Error('Provider error');
297
+ mockTracker.trackMetricsOf.mockRejectedValue(error);
298
+
299
+ const result = await judge.evaluate('test input', 'test output');
300
+
301
+ expect(result).toEqual({
302
+ evals: {},
303
+ success: false,
304
+ error: 'Provider error',
305
+ });
306
+ expect(mockLogger.error).toHaveBeenCalledWith('Judge evaluation failed:', error);
307
+ });
308
+
309
+ it('handles non-Error exceptions', async () => {
310
+ mockTracker.trackMetricsOf.mockRejectedValue('String error');
311
+
312
+ const result = await judge.evaluate('test input', 'test output');
313
+
314
+ expect(result).toEqual({
315
+ evals: {},
316
+ success: false,
317
+ error: 'Unknown error',
318
+ });
319
+ });
320
+ });
321
+
322
+ describe('evaluateMessages', () => {
323
+ let judge: Judge;
324
+
325
+ beforeEach(() => {
326
+ judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
327
+ });
328
+
329
+ it('evaluates messages and response successfully', async () => {
330
+ const messages: LDMessage[] = [
331
+ { role: 'user', content: 'What is the capital of France?' },
332
+ { role: 'assistant', content: 'Paris is the capital of France.' },
333
+ ];
334
+ const response = {
335
+ message: { role: 'assistant' as const, content: 'Paris is the capital of France.' },
336
+ metrics: { success: true },
337
+ };
338
+
339
+ const mockStructuredResponse: StructuredResponse = {
340
+ data: {
341
+ evaluations: {
342
+ relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
343
+ accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
344
+ helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
345
+ },
346
+ },
347
+ rawResponse: JSON.stringify({
348
+ evaluations: {
349
+ relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
350
+ accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
351
+ helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
352
+ },
353
+ }),
354
+ metrics: {
355
+ success: true,
356
+ usage: { total: 100, input: 50, output: 50 },
357
+ },
358
+ };
359
+
360
+ mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
361
+ mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
362
+
363
+ const result = await judge.evaluateMessages(messages, response);
364
+
365
+ expect(result).toEqual({
366
+ evals: {
367
+ relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
368
+ accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
369
+ helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
370
+ },
371
+ success: true,
372
+ });
373
+
374
+ expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith(
375
+ expect.arrayContaining([
376
+ expect.objectContaining({
377
+ role: 'system',
378
+ content: 'You are a helpful judge that evaluates AI responses.',
379
+ }),
380
+ expect.objectContaining({
381
+ role: 'user',
382
+ content:
383
+ 'Evaluate and report scores for important metrics: Input: What is the capital of France?\r\nParis is the capital of France., Output: Paris is the capital of France.',
384
+ }),
385
+ ]),
386
+ expect.any(Object), // evaluation response structure
387
+ );
388
+ });
389
+
390
+ it('handles sampling rate correctly', async () => {
391
+ const messages: LDMessage[] = [{ role: 'user', content: 'test' }];
392
+ const response = {
393
+ message: { role: 'assistant' as const, content: 'test response' },
394
+ metrics: { success: true },
395
+ };
396
+
397
+ // Mock Math.random to return 0.8 (should not be sampled with rate 0.5 since 0.8 > 0.5)
398
+ const originalRandom = Math.random;
399
+ Math.random = jest.fn().mockReturnValue(0.8);
400
+
401
+ const result = await judge.evaluateMessages(messages, response, 0.5);
402
+
403
+ expect(result).toBeUndefined();
404
+ expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled();
405
+
406
+ Math.random = originalRandom;
407
+ });
408
+ });
409
+
410
+ describe('_constructEvaluationMessages', () => {
411
+ let judge: Judge;
412
+
413
+ beforeEach(() => {
414
+ judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
415
+ });
416
+
417
+ it('constructs evaluation messages correctly', () => {
418
+ // Access private method for testing
419
+ // eslint-disable-next-line no-underscore-dangle
420
+ const constructMessages = (judge as any)._constructEvaluationMessages.bind(judge);
421
+ const messages = constructMessages('test input', 'test output');
422
+
423
+ expect(messages).toHaveLength(2);
424
+ expect(messages[0]).toEqual({
425
+ role: 'system',
426
+ content: 'You are a helpful judge that evaluates AI responses.',
427
+ });
428
+ expect(messages[1]).toEqual({
429
+ role: 'user',
430
+ content:
431
+ 'Evaluate and report scores for important metrics: Input: test input, Output: test output',
432
+ });
433
+ });
434
+ });
435
+
436
+ describe('_parseEvaluationResponse', () => {
437
+ let judge: Judge;
438
+
439
+ beforeEach(() => {
440
+ judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
441
+ });
442
+
443
+ it('parses valid evaluation response correctly', () => {
444
+ // eslint-disable-next-line no-underscore-dangle
445
+ const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
446
+ const responseData = {
447
+ evaluations: {
448
+ relevance: { score: 0.8, reasoning: 'Good' },
449
+ accuracy: { score: 0.9, reasoning: 'Accurate' },
450
+ helpfulness: { score: 0.7, reasoning: 'Helpful' },
451
+ },
452
+ };
453
+
454
+ const result = parseResponse(responseData);
455
+
456
+ expect(result).toEqual({
457
+ relevance: { score: 0.8, reasoning: 'Good' },
458
+ accuracy: { score: 0.9, reasoning: 'Accurate' },
459
+ helpfulness: { score: 0.7, reasoning: 'Helpful' },
460
+ });
461
+ });
462
+
463
+ it('returns empty object for invalid response data', () => {
464
+ // eslint-disable-next-line no-underscore-dangle
465
+ const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
466
+ const responseData = {
467
+ relevance: { score: 0.8, reasoning: 'Good' },
468
+ // Missing evaluations wrapper - invalid structure
469
+ };
470
+
471
+ const result = parseResponse(responseData);
472
+
473
+ // Returns empty object when evaluations structure is missing
474
+ expect(result).toEqual({});
475
+ });
476
+
477
+ it('handles missing score or reasoning fields', () => {
478
+ // eslint-disable-next-line no-underscore-dangle
479
+ const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
480
+ const responseData = {
481
+ evaluations: {
482
+ relevance: { score: 0.8 }, // Missing reasoning
483
+ accuracy: { reasoning: 'Accurate' }, // Missing score
484
+ helpfulness: { score: 0.7, reasoning: 'Helpful' },
485
+ },
486
+ };
487
+
488
+ const result = parseResponse(responseData);
489
+
490
+ // Only helpfulness passes validation, relevance and accuracy are skipped
491
+ expect(result).toEqual({
492
+ helpfulness: { score: 0.7, reasoning: 'Helpful' },
493
+ });
494
+ });
495
+ });
496
+ });