npm - @launchdarkly/server-sdk-ai - Versions diffs - 0.14.0 → 0.15.0 - Mend

@launchdarkly/server-sdk-ai 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

package/CHANGELOG.md +18 -0
package/dist/index.cjs +1117 -0
package/dist/index.cjs.map +1 -0
package/dist/index.d.cts +1022 -0
package/dist/index.d.ts +1022 -0
package/dist/index.js +1071 -0
package/dist/index.js.map +1 -0
package/package.json +22 -5
package/__tests__/Judge.test.ts +0 -496
package/__tests__/LDAIClientImpl.test.ts +0 -589
package/__tests__/LDAIConfigTrackerImpl.test.ts +0 -815
package/__tests__/TokenUsage.test.ts +0 -119
package/__tests__/TrackedChat.test.ts +0 -230
package/dist/package.json +0 -53
package/dist/src/LDAIClientImpl.d.ts +0 -39
package/dist/src/LDAIClientImpl.d.ts.map +0 -1
package/dist/src/LDAIClientImpl.js +0 -164
package/dist/src/LDAIClientImpl.js.map +0 -1
package/dist/src/LDAIConfigTrackerImpl.d.ts +0 -73
package/dist/src/LDAIConfigTrackerImpl.d.ts.map +0 -1
package/dist/src/LDAIConfigTrackerImpl.js +0 -203
package/dist/src/LDAIConfigTrackerImpl.js.map +0 -1
package/dist/src/LDClientMin.d.ts +0 -11
package/dist/src/LDClientMin.d.ts.map +0 -1
package/dist/src/LDClientMin.js +0 -3
package/dist/src/LDClientMin.js.map +0 -1
package/dist/src/api/LDAIClient.d.ts +0 -258
package/dist/src/api/LDAIClient.d.ts.map +0 -1
package/dist/src/api/LDAIClient.js +0 -3
package/dist/src/api/LDAIClient.js.map +0 -1
package/dist/src/api/chat/TrackedChat.d.ts +0 -72
package/dist/src/api/chat/TrackedChat.d.ts.map +0 -1
package/dist/src/api/chat/TrackedChat.js +0 -125
package/dist/src/api/chat/TrackedChat.js.map +0 -1
package/dist/src/api/chat/index.d.ts +0 -3
package/dist/src/api/chat/index.d.ts.map +0 -1
package/dist/src/api/chat/index.js +0 -19
package/dist/src/api/chat/index.js.map +0 -1
package/dist/src/api/chat/types.d.ts +0 -22
package/dist/src/api/chat/types.d.ts.map +0 -1
package/dist/src/api/chat/types.js +0 -3
package/dist/src/api/chat/types.js.map +0 -1
package/dist/src/api/config/LDAIConfigTracker.d.ts +0 -203
package/dist/src/api/config/LDAIConfigTracker.d.ts.map +0 -1
package/dist/src/api/config/LDAIConfigTracker.js +0 -3
package/dist/src/api/config/LDAIConfigTracker.js.map +0 -1
package/dist/src/api/config/LDAIConfigUtils.d.ts +0 -2
package/dist/src/api/config/LDAIConfigUtils.d.ts.map +0 -1
package/dist/src/api/config/LDAIConfigUtils.js +0 -141
package/dist/src/api/config/LDAIConfigUtils.js.map +0 -1
package/dist/src/api/config/index.d.ts +0 -3
package/dist/src/api/config/index.d.ts.map +0 -1
package/dist/src/api/config/index.js +0 -18
package/dist/src/api/config/index.js.map +0 -1
package/dist/src/api/config/types.d.ts +0 -202
package/dist/src/api/config/types.d.ts.map +0 -1
package/dist/src/api/config/types.js +0 -3
package/dist/src/api/config/types.js.map +0 -1
package/dist/src/api/index.d.ts +0 -7
package/dist/src/api/index.d.ts.map +0 -1
package/dist/src/api/index.js +0 -23
package/dist/src/api/index.js.map +0 -1
package/dist/src/api/judge/EvaluationSchemaBuilder.d.ts +0 -11
package/dist/src/api/judge/EvaluationSchemaBuilder.d.ts.map +0 -1
package/dist/src/api/judge/EvaluationSchemaBuilder.js +0 -52
package/dist/src/api/judge/EvaluationSchemaBuilder.js.map +0 -1
package/dist/src/api/judge/Judge.d.ts +0 -63
package/dist/src/api/judge/Judge.d.ts.map +0 -1
package/dist/src/api/judge/Judge.js +0 -149
package/dist/src/api/judge/Judge.js.map +0 -1
package/dist/src/api/judge/index.d.ts +0 -3
package/dist/src/api/judge/index.d.ts.map +0 -1
package/dist/src/api/judge/index.js +0 -6
package/dist/src/api/judge/index.js.map +0 -1
package/dist/src/api/judge/types.d.ts +0 -35
package/dist/src/api/judge/types.d.ts.map +0 -1
package/dist/src/api/judge/types.js +0 -3
package/dist/src/api/judge/types.js.map +0 -1
package/dist/src/api/metrics/BedrockTokenUsage.d.ts +0 -7
package/dist/src/api/metrics/BedrockTokenUsage.d.ts.map +0 -1
package/dist/src/api/metrics/BedrockTokenUsage.js +0 -12
package/dist/src/api/metrics/BedrockTokenUsage.js.map +0 -1
package/dist/src/api/metrics/LDAIMetrics.d.ts +0 -17
package/dist/src/api/metrics/LDAIMetrics.d.ts.map +0 -1
package/dist/src/api/metrics/LDAIMetrics.js +0 -3
package/dist/src/api/metrics/LDAIMetrics.js.map +0 -1
package/dist/src/api/metrics/LDFeedbackKind.d.ts +0 -14
package/dist/src/api/metrics/LDFeedbackKind.d.ts.map +0 -1
package/dist/src/api/metrics/LDFeedbackKind.js +0 -18
package/dist/src/api/metrics/LDFeedbackKind.js.map +0 -1
package/dist/src/api/metrics/LDTokenUsage.d.ts +0 -18
package/dist/src/api/metrics/LDTokenUsage.d.ts.map +0 -1
package/dist/src/api/metrics/LDTokenUsage.js +0 -3
package/dist/src/api/metrics/LDTokenUsage.js.map +0 -1
package/dist/src/api/metrics/OpenAiUsage.d.ts +0 -7
package/dist/src/api/metrics/OpenAiUsage.d.ts.map +0 -1
package/dist/src/api/metrics/OpenAiUsage.js +0 -13
package/dist/src/api/metrics/OpenAiUsage.js.map +0 -1
package/dist/src/api/metrics/VercelAISDKTokenUsage.d.ts +0 -9
package/dist/src/api/metrics/VercelAISDKTokenUsage.d.ts.map +0 -1
package/dist/src/api/metrics/VercelAISDKTokenUsage.js +0 -13
package/dist/src/api/metrics/VercelAISDKTokenUsage.js.map +0 -1
package/dist/src/api/metrics/index.d.ts +0 -7
package/dist/src/api/metrics/index.d.ts.map +0 -1
package/dist/src/api/metrics/index.js +0 -23
package/dist/src/api/metrics/index.js.map +0 -1
package/dist/src/api/providers/AIProvider.d.ts +0 -52
package/dist/src/api/providers/AIProvider.d.ts.map +0 -1
package/dist/src/api/providers/AIProvider.js +0 -88
package/dist/src/api/providers/AIProvider.js.map +0 -1
package/dist/src/api/providers/AIProviderFactory.d.ts +0 -39
package/dist/src/api/providers/AIProviderFactory.d.ts.map +0 -1
package/dist/src/api/providers/AIProviderFactory.js +0 -102
package/dist/src/api/providers/AIProviderFactory.js.map +0 -1
package/dist/src/api/providers/index.d.ts +0 -3
package/dist/src/api/providers/index.d.ts.map +0 -1
package/dist/src/api/providers/index.js +0 -19
package/dist/src/api/providers/index.js.map +0 -1
package/dist/src/index.d.ts +0 -19
package/dist/src/index.d.ts.map +0 -1
package/dist/src/index.js +0 -29
package/dist/src/index.js.map +0 -1
package/docs/.nojekyll +0 -1
package/docs/assets/highlight.css +0 -92
package/docs/assets/main.js +0 -58
package/docs/assets/search.js +0 -1
package/docs/assets/style.css +0 -1379
package/docs/classes/AIProvider.html +0 -210
package/docs/classes/AIProviderFactory.html +0 -208
package/docs/classes/Judge.html +0 -322
package/docs/classes/TrackedChat.html +0 -322
package/docs/enums/LDFeedbackKind.html +0 -115
package/docs/functions/createBedrockTokenUsage.html +0 -94
package/docs/functions/createOpenAiUsage.html +0 -94
package/docs/functions/createVercelAISDKTokenUsage.html +0 -98
package/docs/functions/initAi.html +0 -93
package/docs/index.html +0 -136
package/docs/interfaces/ChatResponse.html +0 -130
package/docs/interfaces/EvalScore.html +0 -119
package/docs/interfaces/JudgeResponse.html +0 -129
package/docs/interfaces/LDAIAgentConfig.html +0 -167
package/docs/interfaces/LDAIAgentConfigDefault.html +0 -155
package/docs/interfaces/LDAIAgentRequestConfig.html +0 -129
package/docs/interfaces/LDAIClient.html +0 -449
package/docs/interfaces/LDAICompletionConfig.html +0 -167
package/docs/interfaces/LDAICompletionConfigDefault.html +0 -155
package/docs/interfaces/LDAIConfig.html +0 -148
package/docs/interfaces/LDAIConfigDefault.html +0 -133
package/docs/interfaces/LDAIConfigTracker.html +0 -510
package/docs/interfaces/LDAIJudgeConfig.html +0 -167
package/docs/interfaces/LDAIJudgeConfigDefault.html +0 -155
package/docs/interfaces/LDAIMetrics.html +0 -121
package/docs/interfaces/LDJudge.html +0 -119
package/docs/interfaces/LDJudgeConfiguration.html +0 -109
package/docs/interfaces/LDLogger.html +0 -189
package/docs/interfaces/LDMessage.html +0 -119
package/docs/interfaces/LDModelConfig.html +0 -139
package/docs/interfaces/LDProviderConfig.html +0 -105
package/docs/interfaces/LDTokenUsage.html +0 -129
package/docs/interfaces/StructuredResponse.html +0 -129
package/docs/types/LDAIConfigDefaultKind.html +0 -81
package/docs/types/LDAIConfigKind.html +0 -81
package/docs/types/LDAIConfigMode.html +0 -81
package/docs/types/SupportedAIProvider.html +0 -81
package/docs/variables/SUPPORTED_AI_PROVIDERS.html +0 -81
package/jest.config.js +0 -7
package/src/LDAIClientImpl.ts +0 -327
package/src/LDAIConfigTrackerImpl.ts +0 -278
package/src/LDClientMin.ts +0 -18
package/src/api/LDAIClient.ts +0 -325
package/src/api/chat/TrackedChat.ts +0 -159
package/src/api/chat/index.ts +0 -2
package/src/api/chat/types.ts +0 -24
package/src/api/config/LDAIConfigTracker.ts +0 -231
package/src/api/config/LDAIConfigUtils.ts +0 -201
package/src/api/config/index.ts +0 -3
package/src/api/config/types.ts +0 -256
package/src/api/index.ts +0 -6
package/src/api/judge/EvaluationSchemaBuilder.ts +0 -54
package/src/api/judge/Judge.ts +0 -216
package/src/api/judge/index.ts +0 -2
package/src/api/judge/types.ts +0 -39
package/src/api/metrics/BedrockTokenUsage.ts +0 -13
package/src/api/metrics/LDAIMetrics.ts +0 -18
package/src/api/metrics/LDFeedbackKind.ts +0 -13
package/src/api/metrics/LDTokenUsage.ts +0 -19
package/src/api/metrics/OpenAiUsage.ts +0 -13
package/src/api/metrics/VercelAISDKTokenUsage.ts +0 -15
package/src/api/metrics/index.ts +0 -6
package/src/api/providers/AIProvider.ts +0 -94
package/src/api/providers/AIProviderFactory.ts +0 -152
package/src/api/providers/index.ts +0 -2
package/src/index.ts +0 -24
package/tsconfig.eslint.json +0 -5
package/tsconfig.json +0 -21
package/tsconfig.ref.json +0 -7
package/typedoc.json +0 -5

package/__tests__/Judge.test.ts DELETED Viewed

@@ -1,496 +0,0 @@
-import { LDLogger } from '@launchdarkly/js-server-sdk-common';
-import { LDAIConfigTracker } from '../src/api/config/LDAIConfigTracker';
-import { LDAIJudgeConfig, LDMessage } from '../src/api/config/types';
-import { Judge } from '../src/api/judge/Judge';
-import { StructuredResponse } from '../src/api/judge/types';
-import { AIProvider } from '../src/api/providers/AIProvider';
-describe('Judge', () => {
-  let mockProvider: jest.Mocked<AIProvider>;
-  let mockTracker: jest.Mocked<LDAIConfigTracker>;
-  let mockLogger: jest.Mocked<LDLogger>;
-  let judgeConfig: LDAIJudgeConfig;
-  const mockTrackData = {
-    variationKey: 'test-variation',
-    configKey: 'test-config',
-    version: 1,
-  };
-  beforeEach(() => {
-    // Mock the AIProvider - only mock what's actually used
-    mockProvider = {
-      invokeStructuredModel: jest.fn(),
-    } as any;
-    // Mock the LDAIConfigTracker - only mock what's actually used
-    mockTracker = {
-      trackMetricsOf: jest.fn(),
-      getTrackData: jest.fn().mockReturnValue(mockTrackData),
-    } as any;
-    // Mock the logger - only mock what's actually used
-    mockLogger = {
-      debug: jest.fn(),
-      warn: jest.fn(),
-      error: jest.fn(),
-    } as any;
-    // Create a basic judge config
-    judgeConfig = {
-      enabled: true,
-      messages: [
-        { role: 'system', content: 'You are a helpful judge that evaluates AI responses.' },
-        {
-          role: 'user',
-          content:
-            'Evaluate and report scores for important metrics: Input: {{message_history}}, Output: {{response_to_evaluate}}',
-        },
-      ],
-      model: { name: 'gpt-4' },
-      provider: { name: 'openai' },
-      tracker: mockTracker,
-      evaluationMetricKeys: ['relevance', 'accuracy', 'helpfulness'],
-    };
-  });
-  describe('constructor', () => {
-    it('initializes with proper configuration', () => {
-      const judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
-      expect(judge).toBeDefined();
-    });
-  });
-  describe('evaluate', () => {
-    let judge: Judge;
-    beforeEach(() => {
-      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
-    });
-    it('evaluates AI response successfully', async () => {
-      const mockStructuredResponse: StructuredResponse = {
-        data: {
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
-            accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
-            helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
-          },
-        },
-        rawResponse: JSON.stringify({
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
-            accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
-            helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
-          },
-        }),
-        metrics: {
-          success: true,
-          usage: {
-            total: 100,
-            input: 50,
-            output: 50,
-          },
-        },
-      };
-      mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
-      mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
-      const result = await judge.evaluate(
-        'What is the capital of France?',
-        'Paris is the capital of France.',
-      );
-      expect(result).toEqual({
-        evals: {
-          relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
-          accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
-          helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
-        },
-        success: true,
-      });
-      expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith(
-        expect.arrayContaining([
-          expect.objectContaining({
-            role: 'system',
-            content: 'You are a helpful judge that evaluates AI responses.',
-          }),
-          expect.objectContaining({
-            role: 'user',
-            content:
-              'Evaluate and report scores for important metrics: Input: What is the capital of France?, Output: Paris is the capital of France.',
-          }),
-        ]),
-        expect.any(Object), // evaluation response structure
-      );
-    });
-    it('handles sampling rate correctly', async () => {
-      // Mock Math.random to return 0.3 (should be sampled with rate 0.5 since 0.3 <= 0.5)
-      const originalRandom = Math.random;
-      Math.random = jest.fn().mockReturnValue(0.3);
-      // Mock the structured response
-      const mockStructuredResponse: StructuredResponse = {
-        data: {
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'Good' },
-            accuracy: { score: 0.9, reasoning: 'Accurate' },
-            helpfulness: { score: 0.7, reasoning: 'Helpful' },
-          },
-        },
-        rawResponse: JSON.stringify({
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'Good' },
-            accuracy: { score: 0.9, reasoning: 'Accurate' },
-            helpfulness: { score: 0.7, reasoning: 'Helpful' },
-          },
-        }),
-        metrics: {
-          success: true,
-          usage: { total: 100, input: 50, output: 50 },
-        },
-      };
-      mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
-      mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
-      const result = await judge.evaluate('test input', 'test output', 0.5);
-      expect(result).toBeDefined();
-      expect(mockProvider.invokeStructuredModel).toHaveBeenCalled();
-      Math.random = originalRandom;
-    });
-    it('returns undefined when not sampled', async () => {
-      // Mock Math.random to return 0.8 (should not be sampled with rate 0.5 since 0.8 > 0.5)
-      const originalRandom = Math.random;
-      Math.random = jest.fn().mockReturnValue(0.8);
-      const result = await judge.evaluate('test input', 'test output', 0.5);
-      expect(result).toBeUndefined();
-      expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled();
-      expect(mockLogger.debug).toHaveBeenCalledWith(
-        'Judge evaluation skipped due to sampling rate: 0.5',
-      );
-      Math.random = originalRandom;
-    });
-    it('returns undefined when evaluationMetricKeys is empty', async () => {
-      const configWithoutMetrics: LDAIJudgeConfig = {
-        ...judgeConfig,
-        evaluationMetricKeys: [],
-      };
-      const judgeWithoutMetrics = new Judge(
-        configWithoutMetrics,
-        mockTracker,
-        mockProvider,
-        mockLogger,
-      );
-      const result = await judgeWithoutMetrics.evaluate('test input', 'test output');
-      expect(result).toBeUndefined();
-      expect(mockLogger.warn).toHaveBeenCalledWith(
-        'Judge configuration is missing required evaluationMetricKeys',
-        mockTrackData,
-      );
-    });
-    it('returns undefined when messages are missing', async () => {
-      const configWithoutMessages: LDAIJudgeConfig = {
-        ...judgeConfig,
-        messages: undefined,
-      };
-      const judgeWithoutMessages = new Judge(
-        configWithoutMessages,
-        mockTracker,
-        mockProvider,
-        mockLogger,
-      );
-      const result = await judgeWithoutMessages.evaluate('test input', 'test output');
-      expect(result).toBeUndefined();
-      expect(mockLogger.warn).toHaveBeenCalledWith(
-        'Judge configuration must include messages',
-        mockTrackData,
-      );
-    });
-    it('returns partial evaluations when some metrics are missing', async () => {
-      const mockStructuredResponse: StructuredResponse = {
-        data: {
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'Good' },
-            // accuracy is missing
-            helpfulness: { score: 0.7, reasoning: 'Helpful' },
-          },
-        },
-        rawResponse: JSON.stringify({
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'Good' },
-            helpfulness: { score: 0.7, reasoning: 'Helpful' },
-          },
-        }),
-        metrics: {
-          success: true,
-          usage: { total: 100, input: 50, output: 50 },
-        },
-      };
-      mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
-      mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
-      const result = await judge.evaluate('test input', 'test output');
-      // When one metric is missing, it returns the partial evals it has with success: false
-      expect(result).toEqual({
-        evals: {
-          relevance: { score: 0.8, reasoning: 'Good' },
-          helpfulness: { score: 0.7, reasoning: 'Helpful' },
-        },
-        success: false,
-      });
-    });
-    it('returns empty evaluations when response structure is malformed', async () => {
-      const mockStructuredResponse: StructuredResponse = {
-        data: {
-          // Missing 'evaluations' wrapper - malformed structure
-          relevance: { score: 0.8, reasoning: 'Good' },
-          accuracy: { score: 0.9, reasoning: 'Accurate' },
-          helpfulness: { score: 0.7, reasoning: 'Helpful' },
-        },
-        rawResponse: JSON.stringify({
-          relevance: { score: 0.8, reasoning: 'Good' },
-          accuracy: { score: 0.9, reasoning: 'Accurate' },
-          helpfulness: { score: 0.7, reasoning: 'Helpful' },
-        }),
-        metrics: {
-          success: true,
-          usage: { total: 100, input: 50, output: 50 },
-        },
-      };
-      mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
-      mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
-      const result = await judge.evaluate('test input', 'test output');
-      // When the structure is completely wrong, returns empty evals with success: false
-      expect(result).toEqual({
-        evals: {},
-        success: false,
-      });
-    });
-    it('handles provider errors gracefully', async () => {
-      const error = new Error('Provider error');
-      mockTracker.trackMetricsOf.mockRejectedValue(error);
-      const result = await judge.evaluate('test input', 'test output');
-      expect(result).toEqual({
-        evals: {},
-        success: false,
-        error: 'Provider error',
-      });
-      expect(mockLogger.error).toHaveBeenCalledWith('Judge evaluation failed:', error);
-    });
-    it('handles non-Error exceptions', async () => {
-      mockTracker.trackMetricsOf.mockRejectedValue('String error');
-      const result = await judge.evaluate('test input', 'test output');
-      expect(result).toEqual({
-        evals: {},
-        success: false,
-        error: 'Unknown error',
-      });
-    });
-  });
-  describe('evaluateMessages', () => {
-    let judge: Judge;
-    beforeEach(() => {
-      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
-    });
-    it('evaluates messages and response successfully', async () => {
-      const messages: LDMessage[] = [
-        { role: 'user', content: 'What is the capital of France?' },
-        { role: 'assistant', content: 'Paris is the capital of France.' },
-      ];
-      const response = {
-        message: { role: 'assistant' as const, content: 'Paris is the capital of France.' },
-        metrics: { success: true },
-      };
-      const mockStructuredResponse: StructuredResponse = {
-        data: {
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
-            accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
-            helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
-          },
-        },
-        rawResponse: JSON.stringify({
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
-            accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
-            helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
-          },
-        }),
-        metrics: {
-          success: true,
-          usage: { total: 100, input: 50, output: 50 },
-        },
-      };
-      mockTracker.trackMetricsOf.mockImplementation(async (extractor, func) => func());
-      mockProvider.invokeStructuredModel.mockResolvedValue(mockStructuredResponse);
-      const result = await judge.evaluateMessages(messages, response);
-      expect(result).toEqual({
-        evals: {
-          relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
-          accuracy: { score: 0.9, reasoning: 'The response is factually accurate' },
-          helpfulness: { score: 0.7, reasoning: 'The response provides helpful information' },
-        },
-        success: true,
-      });
-      expect(mockProvider.invokeStructuredModel).toHaveBeenCalledWith(
-        expect.arrayContaining([
-          expect.objectContaining({
-            role: 'system',
-            content: 'You are a helpful judge that evaluates AI responses.',
-          }),
-          expect.objectContaining({
-            role: 'user',
-            content:
-              'Evaluate and report scores for important metrics: Input: What is the capital of France?\r\nParis is the capital of France., Output: Paris is the capital of France.',
-          }),
-        ]),
-        expect.any(Object), // evaluation response structure
-      );
-    });
-    it('handles sampling rate correctly', async () => {
-      const messages: LDMessage[] = [{ role: 'user', content: 'test' }];
-      const response = {
-        message: { role: 'assistant' as const, content: 'test response' },
-        metrics: { success: true },
-      };
-      // Mock Math.random to return 0.8 (should not be sampled with rate 0.5 since 0.8 > 0.5)
-      const originalRandom = Math.random;
-      Math.random = jest.fn().mockReturnValue(0.8);
-      const result = await judge.evaluateMessages(messages, response, 0.5);
-      expect(result).toBeUndefined();
-      expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled();
-      Math.random = originalRandom;
-    });
-  });
-  describe('_constructEvaluationMessages', () => {
-    let judge: Judge;
-    beforeEach(() => {
-      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
-    });
-    it('constructs evaluation messages correctly', () => {
-      // Access private method for testing
-      // eslint-disable-next-line no-underscore-dangle
-      const constructMessages = (judge as any)._constructEvaluationMessages.bind(judge);
-      const messages = constructMessages('test input', 'test output');
-      expect(messages).toHaveLength(2);
-      expect(messages[0]).toEqual({
-        role: 'system',
-        content: 'You are a helpful judge that evaluates AI responses.',
-      });
-      expect(messages[1]).toEqual({
-        role: 'user',
-        content:
-          'Evaluate and report scores for important metrics: Input: test input, Output: test output',
-      });
-    });
-  });
-  describe('_parseEvaluationResponse', () => {
-    let judge: Judge;
-    beforeEach(() => {
-      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
-    });
-    it('parses valid evaluation response correctly', () => {
-      // eslint-disable-next-line no-underscore-dangle
-      const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        evaluations: {
-          relevance: { score: 0.8, reasoning: 'Good' },
-          accuracy: { score: 0.9, reasoning: 'Accurate' },
-          helpfulness: { score: 0.7, reasoning: 'Helpful' },
-        },
-      };
-      const result = parseResponse(responseData);
-      expect(result).toEqual({
-        relevance: { score: 0.8, reasoning: 'Good' },
-        accuracy: { score: 0.9, reasoning: 'Accurate' },
-        helpfulness: { score: 0.7, reasoning: 'Helpful' },
-      });
-    });
-    it('returns empty object for invalid response data', () => {
-      // eslint-disable-next-line no-underscore-dangle
-      const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        relevance: { score: 0.8, reasoning: 'Good' },
-        // Missing evaluations wrapper - invalid structure
-      };
-      const result = parseResponse(responseData);
-      // Returns empty object when evaluations structure is missing
-      expect(result).toEqual({});
-    });
-    it('handles missing score or reasoning fields', () => {
-      // eslint-disable-next-line no-underscore-dangle
-      const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        evaluations: {
-          relevance: { score: 0.8 }, // Missing reasoning
-          accuracy: { reasoning: 'Accurate' }, // Missing score
-          helpfulness: { score: 0.7, reasoning: 'Helpful' },
-        },
-      };
-      const result = parseResponse(responseData);
-      // Only helpfulness passes validation, relevance and accuracy are skipped
-      expect(result).toEqual({
-        helpfulness: { score: 0.7, reasoning: 'Helpful' },
-      });
-    });
-  });
-});