@learning-commons/evaluators 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ import { z } from 'zod';
2
+
1
3
  /**
2
4
  * Logging interface for the Evaluators SDK
3
5
  *
@@ -77,6 +79,82 @@ interface Logger {
77
79
  error(message: string, context?: LogContext): void;
78
80
  }
79
81
 
82
+ /**
83
+ * Message format for LLM conversations
84
+ */
85
+ interface Message {
86
+ role: 'system' | 'user' | 'assistant';
87
+ content: string;
88
+ }
89
+ /**
90
+ * Request configuration for structured LLM generation
91
+ */
92
+ interface LLMRequest<T> {
93
+ messages: Message[];
94
+ schema: z.ZodSchema<T>;
95
+ temperature?: number;
96
+ maxTokens?: number;
97
+ }
98
+ /**
99
+ * Response from LLM with usage metadata
100
+ */
101
+ interface LLMResponse<T> {
102
+ data: T;
103
+ model: string;
104
+ usage: {
105
+ inputTokens: number;
106
+ outputTokens: number;
107
+ };
108
+ latencyMs: number;
109
+ }
110
+ /**
111
+ * Response from plain text generation
112
+ */
113
+ interface TextGenerationResponse {
114
+ text: string;
115
+ usage: {
116
+ inputTokens: number;
117
+ outputTokens: number;
118
+ };
119
+ latencyMs: number;
120
+ }
121
+ /**
122
+ * Base interface for LLM provider implementations
123
+ */
124
+ interface LLMProvider {
125
+ /** Canonical label for the provider and model in use (e.g. "openai:gpt-4o") */
126
+ readonly label: string;
127
+ /**
128
+ * Generate structured output from LLM using Zod schema
129
+ */
130
+ generateStructured<T>(request: LLMRequest<T>): Promise<LLMResponse<T>>;
131
+ /**
132
+ * Generate plain text from LLM
133
+ */
134
+ generateText(messages: Message[], temperature?: number): Promise<TextGenerationResponse>;
135
+ }
136
+ /**
137
+ * Named constants for LLM provider types — use instead of raw string literals.
138
+ */
139
+ declare const Providers: {
140
+ readonly google: "google";
141
+ readonly openai: "openai";
142
+ readonly anthropic: "anthropic";
143
+ readonly custom: "custom";
144
+ };
145
+ /**
146
+ * Configuration for LLM provider
147
+ */
148
+ interface ProviderConfig {
149
+ type: 'openai' | 'anthropic' | 'google' | 'custom';
150
+ apiKey?: string;
151
+ model?: string;
152
+ temperature?: number;
153
+ baseURL?: string;
154
+ customProvider?: LLMProvider;
155
+ maxRetries?: number;
156
+ }
157
+
80
158
  /**
81
159
  * Evaluation status
82
160
  */
@@ -130,6 +208,7 @@ interface TelemetryEvent {
130
208
  provider: string;
131
209
  token_usage?: TokenUsage;
132
210
  metadata?: TelemetryMetadata;
211
+ model_override?: boolean;
133
212
  input_text?: string;
134
213
  }
135
214
  /**
@@ -166,6 +245,14 @@ declare class TelemetryClient {
166
245
  send(event: TelemetryEvent): Promise<void>;
167
246
  }
168
247
 
248
+ /**
249
+ * Supported LLM providers
250
+ */
251
+ declare enum Provider {
252
+ OpenAI = "openai",
253
+ Google = "google",
254
+ Anthropic = "anthropic"
255
+ }
169
256
  /**
170
257
  * Granular telemetry configuration options
171
258
  */
@@ -175,6 +262,24 @@ interface TelemetryOptions {
175
262
  /** Record input text in telemetry (default: false) */
176
263
  recordInputs?: boolean;
177
264
  }
265
+ /**
266
+ * Override the provider and model used by an evaluator.
267
+ *
268
+ * When set, all LLM calls use this provider and model instead of the defaults.
269
+ * The evaluator's normal key requirements are bypassed — provide the key for
270
+ * the chosen provider via the matching top-level config field
271
+ * (e.g. `anthropicApiKey` for `Provider.Anthropic`).
272
+ *
273
+ * Both `provider` and `model` are required. An empty or missing `model` throws
274
+ * `ConfigurationError` at construction time. An unrecognised model ID throws
275
+ * `ConfigurationError` at evaluation time when the provider rejects it.
276
+ *
277
+ * Results may vary; evaluators are validated against their recommended models.
278
+ */
279
+ interface ModelOverride {
280
+ provider: Provider;
281
+ model: string;
282
+ }
178
283
  /**
179
284
  * Base configuration for all evaluators
180
285
  */
@@ -183,8 +288,16 @@ interface BaseEvaluatorConfig {
183
288
  googleApiKey?: string;
184
289
  /** OpenAI API key (for evaluators using GPT) */
185
290
  openaiApiKey?: string;
291
+ /** Anthropic API key (for evaluators using Claude) */
292
+ anthropicApiKey?: string;
186
293
  /** Learning Commons partner key for authenticated telemetry (optional) */
187
294
  partnerKey?: string;
295
+ /**
296
+ * Override the provider and model used by this evaluator.
297
+ * When set, all LLM calls use this provider and model instead of the defaults.
298
+ * See {@link ModelOverride} for details.
299
+ */
300
+ modelOverride?: ModelOverride;
188
301
  /**
189
302
  * Maximum number of retries for failed API calls (default: 2)
190
303
  * Set to 0 to disable retries.
@@ -232,10 +345,8 @@ interface EvaluatorMetadata {
232
345
  readonly description: string;
233
346
  /** Supported grade levels (e.g., ['3', '4', '5', ...]) */
234
347
  readonly supportedGrades: readonly string[];
235
- /** Whether this evaluator requires a Google API key */
236
- readonly requiresGoogleKey: boolean;
237
- /** Whether this evaluator requires an OpenAI API key */
238
- readonly requiresOpenAIKey: boolean;
348
+ /** Providers required by this evaluator's default configuration */
349
+ readonly defaultProviders: readonly Provider[];
239
350
  }
240
351
  /**
241
352
  * Abstract base class for all evaluators
@@ -254,6 +365,10 @@ declare abstract class BaseEvaluator {
254
365
  protected logger: Logger;
255
366
  protected config: Required<Pick<BaseEvaluatorConfig, 'maxRetries'>> & {
256
367
  telemetry: Required<TelemetryOptions>;
368
+ modelOverride?: ModelOverride;
369
+ googleApiKey?: string;
370
+ openaiApiKey?: string;
371
+ anthropicApiKey?: string;
257
372
  };
258
373
  /**
259
374
  * Static metadata for the evaluator
@@ -268,13 +383,17 @@ declare abstract class BaseEvaluator {
268
383
  * name: 'My Evaluator',
269
384
  * description: 'Does something useful',
270
385
  * supportedGrades: ['3', '4', '5'],
271
- * requiresGoogleKey: true,
272
- * requiresOpenAIKey: false,
386
+ * defaultProviders: [Provider.Google],
273
387
  * };
274
388
  * }
275
389
  * ```
276
390
  */
277
391
  static readonly metadata: EvaluatorMetadata;
392
+ /**
393
+ * @throws {ConfigurationError} If the subclass has not defined static metadata
394
+ * @throws {ConfigurationError} If modelOverride has an invalid provider or empty model
395
+ * @throws {ConfigurationError} If a required API key is missing
396
+ */
278
397
  constructor(config: BaseEvaluatorConfig);
279
398
  /**
280
399
  * Get metadata for this evaluator instance
@@ -282,8 +401,16 @@ declare abstract class BaseEvaluator {
282
401
  */
283
402
  protected get metadata(): EvaluatorMetadata;
284
403
  /**
285
- * Validate that required API keys are provided based on metadata
286
- * @throws {ConfigurationError} If required API keys are missing
404
+ * Validate modelOverride shape: provider must be a known Provider value and
405
+ * model must be a non-empty string.
406
+ * @throws {ConfigurationError} If the override is malformed
407
+ */
408
+ private validateModelOverride;
409
+ /**
410
+ * Validate that the required API key is present.
411
+ * When modelOverride is set, checks the override provider's key.
412
+ * Otherwise checks the keys required by the evaluator's default providers.
413
+ * @throws {ConfigurationError} If a required key is missing
287
414
  */
288
415
  private validateApiKeys;
289
416
  /**
@@ -311,6 +438,12 @@ declare abstract class BaseEvaluator {
311
438
  * @throws {ValidationError} If grade is invalid
312
439
  */
313
440
  protected validateGrade(grade: string, validGrades: Set<string>): void;
441
+ /**
442
+ * Create an LLM provider, honouring modelOverride if set.
443
+ * When override is active, the key for the override provider is resolved
444
+ * from the matching top-level config field (e.g. anthropicApiKey for Anthropic).
445
+ */
446
+ protected createConfiguredProvider(defaultType: Provider, defaultModel: string, defaultApiKey: string | undefined): LLMProvider;
314
447
  /**
315
448
  * Send telemetry event to analytics service
316
449
  * Common helper for all evaluators
@@ -328,4 +461,4 @@ declare abstract class BaseEvaluator {
328
461
  }): Promise<void>;
329
462
  }
330
463
 
331
- export { BaseEvaluator as B, type EvaluatorMetadata as E, type Logger as L, type TelemetryOptions as T, type BaseEvaluatorConfig as a, type LogContext as b, LogLevel as c };
464
+ export { BaseEvaluator as B, type EvaluatorMetadata as E, type LLMProvider as L, type Message as M, Provider as P, type TelemetryOptions as T, type BaseEvaluatorConfig as a, type LLMRequest as b, type LLMResponse as c, type LogContext as d, LogLevel as e, type Logger as f, type ModelOverride as g, type ProviderConfig as h, Providers as i, type TextGenerationResponse as j };