@learning-commons/evaluators 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ import { generateText, Output } from 'ai';
9
9
  import { z } from 'zod';
10
10
  import nlp from 'compromise';
11
11
  import { syllable } from 'syllable';
12
+ import textReadability from 'text-readability';
12
13
  import { parse } from 'csv-parse/sync';
13
14
 
14
15
  // src/batch/evaluator.ts
@@ -172,8 +173,9 @@ var TimeoutError = class extends APIError {
172
173
  function parseProviderError(error) {
173
174
  if (error instanceof Error) {
174
175
  const message = error.message;
176
+ const err = error;
175
177
  const statusMatch = message.match(/\b(4\d{2}|5\d{2})\b/);
176
- const statusCode = statusMatch ? parseInt(statusMatch[1]) : void 0;
178
+ const statusCode = err.statusCode ?? err.status ?? (statusMatch ? parseInt(statusMatch[1]) : void 0);
177
179
  return {
178
180
  message,
179
181
  statusCode,
@@ -186,6 +188,11 @@ function parseProviderError(error) {
186
188
  }
187
189
  function wrapProviderError(error, defaultMessage = "API request failed") {
188
190
  const { message, statusCode, code } = parseProviderError(error);
191
+ if (statusCode === 404 || statusCode === 400 && /\bmodel\b.*(not found|does not exist|invalid)/i.test(message)) {
192
+ return new ConfigurationError(
193
+ `Model not found or invalid: ${message}. Check the model ID passed to the provider.`
194
+ );
195
+ }
189
196
  if (statusCode === 401 || statusCode === 403) {
190
197
  return new AuthenticationError(
191
198
  message.includes("API key") ? message : "Invalid API key",
@@ -260,6 +267,111 @@ function createLogger(customLogger, level = 2 /* WARN */) {
260
267
  }
261
268
  return new ConsoleLogger(level);
262
269
  }
270
+ var VercelAIProvider = class {
271
+ constructor(config) {
272
+ this.config = config;
273
+ if (config.type === "custom") {
274
+ throw new Error(
275
+ "VercelAIProvider does not support custom type. Use config.customProvider directly."
276
+ );
277
+ }
278
+ if (!config.model || config.model.trim() === "") {
279
+ throw new Error(
280
+ `model is required for VercelAIProvider (type: "${config.type}"). No default is assumed.`
281
+ );
282
+ }
283
+ this.model = config.model;
284
+ this.label = `${config.type}:${config.model}`;
285
+ }
286
+ label;
287
+ model;
288
+ /**
289
+ * Generate structured output using Vercel AI SDK's generateText with output
290
+ */
291
+ async generateStructured(request) {
292
+ const model = await this.getModel();
293
+ const startTime = Date.now();
294
+ const { output, usage } = await generateText({
295
+ model,
296
+ messages: request.messages,
297
+ output: Output.object({ schema: request.schema }),
298
+ temperature: request.temperature ?? 0,
299
+ maxRetries: this.config.maxRetries ?? 0,
300
+ ...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
301
+ });
302
+ return {
303
+ data: output,
304
+ model: this.model,
305
+ usage: {
306
+ inputTokens: usage.inputTokens || 0,
307
+ outputTokens: usage.outputTokens || 0
308
+ },
309
+ latencyMs: Date.now() - startTime
310
+ };
311
+ }
312
+ /**
313
+ * Generate plain text using Vercel AI SDK's generateText
314
+ */
315
+ async generateText(messages, temperature) {
316
+ const model = await this.getModel();
317
+ const startTime = Date.now();
318
+ const { text, usage } = await generateText({
319
+ model,
320
+ messages,
321
+ temperature: temperature ?? this.config.temperature ?? 0,
322
+ maxRetries: this.config.maxRetries ?? 0
323
+ });
324
+ return {
325
+ text,
326
+ usage: {
327
+ inputTokens: usage.inputTokens || 0,
328
+ outputTokens: usage.outputTokens || 0
329
+ },
330
+ latencyMs: Date.now() - startTime
331
+ };
332
+ }
333
+ /**
334
+ * Get the configured language model.
335
+ * Uses dynamic imports so consumers only need to install the provider packages they use.
336
+ */
337
+ async getModel() {
338
+ const apiKey = this.config.apiKey;
339
+ switch (this.config.type) {
340
+ case "openai": {
341
+ const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
342
+ throw new Error(
343
+ "To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
344
+ );
345
+ });
346
+ return createOpenAI(apiKey ? { apiKey } : {})(this.model);
347
+ }
348
+ case "anthropic": {
349
+ const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
350
+ throw new Error(
351
+ "To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
352
+ );
353
+ });
354
+ return createAnthropic(apiKey ? { apiKey } : {})(this.model);
355
+ }
356
+ case "google": {
357
+ const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
358
+ throw new Error(
359
+ "To use the Google provider, install its adapter: npm install @ai-sdk/google"
360
+ );
361
+ });
362
+ return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(this.model);
363
+ }
364
+ default:
365
+ throw new Error(`Unsupported provider type: ${this.config.type}`);
366
+ }
367
+ }
368
+ };
369
+ function createProvider(config) {
370
+ if (config.type === "custom" && config.customProvider) {
371
+ return config.customProvider;
372
+ }
373
+ return new VercelAIProvider(config);
374
+ }
263
375
 
264
376
  // src/evaluators/base.ts
265
377
  var VALIDATION_LIMITS = {
@@ -268,6 +380,12 @@ var VALIDATION_LIMITS = {
268
380
  /** Maximum text length in characters (100K chars ≈ 25K tokens) */
269
381
  MAX_TEXT_LENGTH: 1e5
270
382
  };
383
+ var Provider = /* @__PURE__ */ ((Provider2) => {
384
+ Provider2["OpenAI"] = "openai";
385
+ Provider2["Google"] = "google";
386
+ Provider2["Anthropic"] = "anthropic";
387
+ return Provider2;
388
+ })(Provider || {});
271
389
  var BaseEvaluator = class {
272
390
  telemetryClient;
273
391
  logger;
@@ -285,21 +403,35 @@ var BaseEvaluator = class {
285
403
  * name: 'My Evaluator',
286
404
  * description: 'Does something useful',
287
405
  * supportedGrades: ['3', '4', '5'],
288
- * requiresGoogleKey: true,
289
- * requiresOpenAIKey: false,
406
+ * defaultProviders: [Provider.Google],
290
407
  * };
291
408
  * }
292
409
  * ```
293
410
  */
294
411
  static metadata;
412
+ /**
413
+ * @throws {ConfigurationError} If the subclass has not defined static metadata
414
+ * @throws {ConfigurationError} If modelOverride has an invalid provider or empty model
415
+ * @throws {ConfigurationError} If a required API key is missing
416
+ */
295
417
  constructor(config) {
296
418
  this.logger = createLogger(config.logger, config.logLevel ?? 2 /* WARN */);
419
+ this.validateModelOverride(config);
297
420
  this.validateApiKeys(config);
298
421
  const telemetryConfig = this.normalizeTelemetryConfig(config.telemetry);
299
422
  this.config = {
300
423
  maxRetries: config.maxRetries ?? 2,
301
- telemetry: telemetryConfig
424
+ telemetry: telemetryConfig,
425
+ modelOverride: config.modelOverride,
426
+ googleApiKey: config.googleApiKey,
427
+ openaiApiKey: config.openaiApiKey,
428
+ anthropicApiKey: config.anthropicApiKey
302
429
  };
430
+ if (config.modelOverride) {
431
+ this.logger.warn(
432
+ `modelOverride is active: using ${config.modelOverride.provider}:${config.modelOverride.model} instead of the default model. Evaluation quality may differ from recommended defaults.`
433
+ );
434
+ }
303
435
  if (this.config.telemetry.enabled) {
304
436
  this.telemetryClient = new TelemetryClient({
305
437
  endpoint: "https://api.learningcommons.org/evaluators-telemetry/v1/events",
@@ -324,21 +456,62 @@ var BaseEvaluator = class {
324
456
  return meta;
325
457
  }
326
458
  /**
327
- * Validate that required API keys are provided based on metadata
328
- * @throws {ConfigurationError} If required API keys are missing
459
+ * Validate modelOverride shape: provider must be a known Provider value and
460
+ * model must be a non-empty string.
461
+ * @throws {ConfigurationError} If the override is malformed
329
462
  */
330
- validateApiKeys(config) {
331
- if (this.metadata.requiresGoogleKey && !config.googleApiKey) {
463
+ validateModelOverride(config) {
464
+ if (!config.modelOverride) return;
465
+ const validProviders = Object.values(Provider);
466
+ if (!validProviders.includes(config.modelOverride.provider)) {
332
467
  throw new ConfigurationError(
333
- `Google API key is required for ${this.metadata.name} evaluator. Pass googleApiKey in config.`
468
+ `Invalid provider "${config.modelOverride.provider}" in modelOverride. Valid providers are: ${validProviders.join(", ")}.`
334
469
  );
335
470
  }
336
- if (this.metadata.requiresOpenAIKey && !config.openaiApiKey) {
471
+ if (!config.modelOverride.model || config.modelOverride.model.trim() === "") {
337
472
  throw new ConfigurationError(
338
- `OpenAI API key is required for ${this.metadata.name} evaluator. Pass openaiApiKey in config.`
473
+ `modelOverride.model is required. Specify the model ID for provider "${config.modelOverride.provider}".`
339
474
  );
340
475
  }
341
476
  }
477
+ /**
478
+ * Validate that the required API key is present.
479
+ * When modelOverride is set, checks the override provider's key.
480
+ * Otherwise checks the keys required by the evaluator's default providers.
481
+ * @throws {ConfigurationError} If a required key is missing
482
+ */
483
+ validateApiKeys(config) {
484
+ const keyFor = {
485
+ ["openai" /* OpenAI */]: config.openaiApiKey?.trim() || void 0,
486
+ ["google" /* Google */]: config.googleApiKey?.trim() || void 0,
487
+ ["anthropic" /* Anthropic */]: config.anthropicApiKey?.trim() || void 0
488
+ };
489
+ const humanName = {
490
+ ["openai" /* OpenAI */]: "OpenAI API key",
491
+ ["google" /* Google */]: "Google API key",
492
+ ["anthropic" /* Anthropic */]: "Anthropic API key"
493
+ };
494
+ const configKey = {
495
+ ["openai" /* OpenAI */]: "openaiApiKey",
496
+ ["google" /* Google */]: "googleApiKey",
497
+ ["anthropic" /* Anthropic */]: "anthropicApiKey"
498
+ };
499
+ if (config.modelOverride) {
500
+ if (!keyFor[config.modelOverride.provider]) {
501
+ throw new ConfigurationError(
502
+ `${humanName[config.modelOverride.provider]} is required when using modelOverride with provider "${config.modelOverride.provider}". Pass ${configKey[config.modelOverride.provider]} in config.`
503
+ );
504
+ }
505
+ return;
506
+ }
507
+ for (const provider of this.metadata.defaultProviders) {
508
+ if (!keyFor[provider]) {
509
+ throw new ConfigurationError(
510
+ `${humanName[provider]} is required for ${this.metadata.name} evaluator. Pass ${configKey[provider]} in config.`
511
+ );
512
+ }
513
+ }
514
+ }
342
515
  /**
343
516
  * Normalize telemetry config to standard format
344
517
  */
@@ -419,6 +592,33 @@ var BaseEvaluator = class {
419
592
  );
420
593
  }
421
594
  }
595
+ /**
596
+ * Create an LLM provider, honouring modelOverride if set.
597
+ * When override is active, the key for the override provider is resolved
598
+ * from the matching top-level config field (e.g. anthropicApiKey for Anthropic).
599
+ */
600
+ createConfiguredProvider(defaultType, defaultModel, defaultApiKey) {
601
+ const override = this.config.modelOverride;
602
+ if (override) {
603
+ const apiKeyFor = {
604
+ ["openai" /* OpenAI */]: this.config.openaiApiKey,
605
+ ["google" /* Google */]: this.config.googleApiKey,
606
+ ["anthropic" /* Anthropic */]: this.config.anthropicApiKey
607
+ };
608
+ return createProvider({
609
+ type: override.provider,
610
+ model: override.model,
611
+ apiKey: apiKeyFor[override.provider],
612
+ maxRetries: this.config.maxRetries
613
+ });
614
+ }
615
+ return createProvider({
616
+ type: defaultType,
617
+ model: defaultModel,
618
+ apiKey: defaultApiKey,
619
+ maxRetries: this.config.maxRetries
620
+ });
621
+ }
422
622
  /**
423
623
  * Send telemetry event to analytics service
424
624
  * Common helper for all evaluators
@@ -439,123 +639,12 @@ var BaseEvaluator = class {
439
639
  provider: params.provider,
440
640
  token_usage: params.tokenUsage,
441
641
  metadata: params.metadata,
642
+ model_override: this.config.modelOverride ? true : void 0,
442
643
  // Include input text only if recording is enabled
443
644
  input_text: this.config.telemetry.recordInputs ? params.inputText : void 0
444
645
  });
445
646
  }
446
647
  };
447
- var DEFAULT_MODELS = {
448
- openai: "gpt-4o",
449
- anthropic: "claude-sonnet-4-5-20250929",
450
- google: "gemini-2.5-pro"
451
- };
452
- var VercelAIProvider = class {
453
- constructor(config) {
454
- this.config = config;
455
- if (config.type === "custom") {
456
- throw new Error(
457
- "VercelAIProvider does not support custom type. Use config.customProvider directly."
458
- );
459
- }
460
- }
461
- /**
462
- * Generate structured output using Vercel AI SDK's generateText with output
463
- */
464
- async generateStructured(request) {
465
- const model = await this.getModel(request.model);
466
- const startTime = Date.now();
467
- const { output, usage } = await generateText({
468
- model,
469
- messages: request.messages,
470
- output: Output.object({ schema: request.schema }),
471
- temperature: request.temperature ?? 0,
472
- maxRetries: this.config.maxRetries ?? 0,
473
- ...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
474
- });
475
- return {
476
- data: output,
477
- model: request.model || this.getDefaultModel(),
478
- usage: {
479
- inputTokens: usage.inputTokens || 0,
480
- outputTokens: usage.outputTokens || 0
481
- },
482
- latencyMs: Date.now() - startTime
483
- };
484
- }
485
- /**
486
- * Generate plain text using Vercel AI SDK's generateText
487
- */
488
- async generateText(messages, temperature) {
489
- const model = await this.getModel();
490
- const startTime = Date.now();
491
- const { text, usage } = await generateText({
492
- model,
493
- messages,
494
- temperature: temperature ?? this.config.temperature ?? 0,
495
- maxRetries: this.config.maxRetries ?? 0
496
- });
497
- return {
498
- text,
499
- usage: {
500
- inputTokens: usage.inputTokens || 0,
501
- outputTokens: usage.outputTokens || 0
502
- },
503
- latencyMs: Date.now() - startTime
504
- };
505
- }
506
- /**
507
- * Get the configured language model.
508
- * Uses dynamic imports so consumers only need to install the provider packages they use.
509
- */
510
- async getModel(requestModel) {
511
- const modelId = requestModel || this.config.model || this.getDefaultModel();
512
- const apiKey = this.config.apiKey;
513
- switch (this.config.type) {
514
- case "openai": {
515
- const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
516
- throw new Error(
517
- "To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
518
- );
519
- });
520
- return createOpenAI(apiKey ? { apiKey } : {})(modelId);
521
- }
522
- case "anthropic": {
523
- const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
524
- throw new Error(
525
- "To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
526
- );
527
- });
528
- return createAnthropic(apiKey ? { apiKey } : {})(modelId);
529
- }
530
- case "google": {
531
- const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
532
- throw new Error(
533
- "To use the Google provider, install its adapter: npm install @ai-sdk/google"
534
- );
535
- });
536
- return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(modelId);
537
- }
538
- default:
539
- throw new Error(`Unsupported provider type: ${this.config.type}`);
540
- }
541
- }
542
- /**
543
- * Get default model for the configured provider
544
- */
545
- getDefaultModel() {
546
- const providerType = this.config.type;
547
- if (providerType === "custom") {
548
- throw new Error("Cannot get default model for custom provider type");
549
- }
550
- return DEFAULT_MODELS[providerType];
551
- }
552
- };
553
- function createProvider(config) {
554
- if (config.type === "custom" && config.customProvider) {
555
- return config.customProvider;
556
- }
557
- return new VercelAIProvider(config);
558
- }
559
648
  var TextComplexityLevel = z.enum([
560
649
  "Slightly complex",
561
650
  "Moderately complex",
@@ -757,6 +846,44 @@ function featuresToJSON(features, decimals = 1, castToInt = true) {
757
846
  }
758
847
  return JSON.stringify(payload, null, 2);
759
848
  }
849
+ var LIBRARY_ADAPTERS = {
850
+ "text-readability": {
851
+ call(fnName, text) {
852
+ const fn = textReadability[fnName];
853
+ if (typeof fn !== "function") {
854
+ throw new Error(`Function "${fnName}" not found in text-readability.`);
855
+ }
856
+ return fn.call(textReadability, text);
857
+ }
858
+ }
859
+ };
860
+ var POST_TRANSFORMS = {
861
+ round(value, { precision = 0 }) {
862
+ const factor = 10 ** precision;
863
+ return Math.round(value * factor) / factor;
864
+ }
865
+ };
866
+ function runPreprocessingStep(text, impl) {
867
+ const adapter = LIBRARY_ADAPTERS[impl.library];
868
+ if (!adapter) {
869
+ const supported = Object.keys(LIBRARY_ADAPTERS).join(", ");
870
+ throw new Error(
871
+ `Unsupported preprocessing library "${impl.library}". Supported: ${supported}.`
872
+ );
873
+ }
874
+ let result = adapter.call(impl.function, text);
875
+ if (impl.post_transform) {
876
+ const transform = POST_TRANSFORMS[impl.post_transform.type];
877
+ if (!transform) {
878
+ const supported = Object.keys(POST_TRANSFORMS).join(", ");
879
+ throw new Error(
880
+ `Unsupported post_transform type "${impl.post_transform.type}". Supported: ${supported}.`
881
+ );
882
+ }
883
+ result = transform(result, impl.post_transform);
884
+ }
885
+ return result;
886
+ }
760
887
 
761
888
  // ../../evals/prompts/vocabulary/background-knowledge.txt
762
889
  var background_knowledge_default = `
@@ -1062,32 +1189,28 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1062
1189
  name: "Vocabulary",
1063
1190
  description: "Evaluates vocabulary complexity of educational texts relative to grade level",
1064
1191
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1065
- requiresGoogleKey: true,
1066
- requiresOpenAIKey: true
1192
+ defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
1067
1193
  };
1068
1194
  grades34ComplexityProvider;
1069
1195
  otherGradesComplexityProvider;
1070
1196
  backgroundKnowledgeProvider;
1071
1197
  constructor(config) {
1072
1198
  super(config);
1073
- this.grades34ComplexityProvider = createProvider({
1074
- type: "google",
1075
- model: "gemini-2.5-pro",
1076
- apiKey: config.googleApiKey,
1077
- maxRetries: this.config.maxRetries
1078
- });
1079
- this.otherGradesComplexityProvider = createProvider({
1080
- type: "openai",
1081
- model: "gpt-4.1-2025-04-14",
1082
- apiKey: config.openaiApiKey,
1083
- maxRetries: this.config.maxRetries
1084
- });
1085
- this.backgroundKnowledgeProvider = createProvider({
1086
- type: "openai",
1087
- model: "gpt-4o-2024-11-20",
1088
- apiKey: config.openaiApiKey,
1089
- maxRetries: this.config.maxRetries
1090
- });
1199
+ this.grades34ComplexityProvider = this.createConfiguredProvider(
1200
+ "google" /* Google */,
1201
+ "gemini-2.5-pro",
1202
+ config.googleApiKey
1203
+ );
1204
+ this.otherGradesComplexityProvider = this.createConfiguredProvider(
1205
+ "openai" /* OpenAI */,
1206
+ "gpt-4.1-2025-04-14",
1207
+ config.openaiApiKey
1208
+ );
1209
+ this.backgroundKnowledgeProvider = this.createConfiguredProvider(
1210
+ "openai" /* OpenAI */,
1211
+ "gpt-4o-2024-11-20",
1212
+ config.openaiApiKey
1213
+ );
1091
1214
  }
1092
1215
  /**
1093
1216
  * Evaluate vocabulary complexity for a given text and grade level
@@ -1096,6 +1219,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1096
1219
  * @param grade - The target grade level (3-12)
1097
1220
  * @returns Evaluation result with complexity score and detailed analysis
1098
1221
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1222
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1099
1223
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1100
1224
  */
1101
1225
  async evaluate(text, grade) {
@@ -1107,7 +1231,9 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1107
1231
  });
1108
1232
  const startTime = Date.now();
1109
1233
  const stageDetails = [];
1110
- const complexityProviderName = grade === "3" || grade === "4" ? "google:gemini-2.5-pro" : "openai:gpt-4.1-2025-04-14";
1234
+ const complexityProviderLabel = grade === "3" || grade === "4" ? this.grades34ComplexityProvider.label : this.otherGradesComplexityProvider.label;
1235
+ const backgroundProviderLabel = this.backgroundKnowledgeProvider.label;
1236
+ const modelLabel = this.config.modelOverride ? backgroundProviderLabel : `${backgroundProviderLabel}+${complexityProviderLabel}`;
1111
1237
  try {
1112
1238
  this.validateText(text);
1113
1239
  this.validateGrade(grade, new Set(_VocabularyEvaluator.metadata.supportedGrades));
@@ -1118,7 +1244,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1118
1244
  const bgResponse = await this.getBackgroundKnowledgeAssumption(text, grade);
1119
1245
  stageDetails.push({
1120
1246
  stage: "background_knowledge",
1121
- provider: "openai:gpt-4o-2024-11-20",
1247
+ provider: backgroundProviderLabel,
1122
1248
  latency_ms: bgResponse.latencyMs,
1123
1249
  token_usage: {
1124
1250
  input_tokens: bgResponse.usage.inputTokens,
@@ -1134,7 +1260,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1134
1260
  );
1135
1261
  stageDetails.push({
1136
1262
  stage: "complexity_evaluation",
1137
- provider: complexityProviderName,
1263
+ provider: complexityProviderLabel,
1138
1264
  latency_ms: complexityResponse.latencyMs,
1139
1265
  token_usage: {
1140
1266
  input_tokens: complexityResponse.usage.inputTokens,
@@ -1150,7 +1276,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1150
1276
  score: complexityResponse.data.complexity_score,
1151
1277
  reasoning: complexityResponse.data.reasoning,
1152
1278
  metadata: {
1153
- model: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1279
+ model: modelLabel,
1154
1280
  processingTimeMs: latencyMs
1155
1281
  },
1156
1282
  _internal: complexityResponse.data
@@ -1160,7 +1286,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1160
1286
  latencyMs,
1161
1287
  textLength: text.length,
1162
1288
  grade,
1163
- provider: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1289
+ provider: modelLabel,
1164
1290
  tokenUsage: totalTokenUsage,
1165
1291
  metadata: {
1166
1292
  stage_details: stageDetails
@@ -1195,7 +1321,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1195
1321
  latencyMs,
1196
1322
  textLength: text.length,
1197
1323
  grade,
1198
- provider: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1324
+ provider: modelLabel,
1199
1325
  tokenUsage: totalTokenUsage,
1200
1326
  errorCode: error instanceof Error ? error.name : "UnknownError",
1201
1327
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -1413,25 +1539,12 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1413
1539
  name: "Sentence Structure",
1414
1540
  description: "Evaluates sentence structure complexity based on grammatical features",
1415
1541
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1416
- requiresGoogleKey: false,
1417
- requiresOpenAIKey: true
1542
+ defaultProviders: ["openai" /* OpenAI */]
1418
1543
  };
1419
- analysisProvider;
1420
- complexityProvider;
1544
+ provider;
1421
1545
  constructor(config) {
1422
1546
  super(config);
1423
- this.analysisProvider = createProvider({
1424
- type: "openai",
1425
- model: "gpt-4o",
1426
- apiKey: config.openaiApiKey,
1427
- maxRetries: this.config.maxRetries
1428
- });
1429
- this.complexityProvider = createProvider({
1430
- type: "openai",
1431
- model: "gpt-4o",
1432
- apiKey: config.openaiApiKey,
1433
- maxRetries: this.config.maxRetries
1434
- });
1547
+ this.provider = this.createConfiguredProvider("openai" /* OpenAI */, "gpt-4o", config.openaiApiKey);
1435
1548
  }
1436
1549
  /**
1437
1550
  * Evaluate sentence structure complexity for a given text and grade level
@@ -1440,6 +1553,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1440
1553
  * @param grade - The target grade level (3-12)
1441
1554
  * @returns Evaluation result with complexity score and detailed analysis
1442
1555
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1556
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1443
1557
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1444
1558
  */
1445
1559
  async evaluate(text, grade) {
@@ -1461,7 +1575,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1461
1575
  const analysisResponse = await this.analyzeSentenceStructure(text);
1462
1576
  stageDetails.push({
1463
1577
  stage: "sentence_analysis",
1464
- provider: "openai:gpt-4o",
1578
+ provider: this.provider.label,
1465
1579
  latency_ms: analysisResponse.latencyMs,
1466
1580
  token_usage: {
1467
1581
  input_tokens: analysisResponse.usage.inputTokens,
@@ -1476,7 +1590,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1476
1590
  const complexityResponse = await this.classifyComplexity(features, grade, text);
1477
1591
  stageDetails.push({
1478
1592
  stage: "complexity_classification",
1479
- provider: "openai:gpt-4o",
1593
+ provider: this.provider.label,
1480
1594
  latency_ms: complexityResponse.latencyMs,
1481
1595
  token_usage: {
1482
1596
  input_tokens: complexityResponse.usage.inputTokens,
@@ -1492,7 +1606,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1492
1606
  score: complexityResponse.data.answer,
1493
1607
  reasoning: complexityResponse.data.reasoning,
1494
1608
  metadata: {
1495
- model: "openai:gpt-4o",
1609
+ model: this.provider.label,
1496
1610
  processingTimeMs: latencyMs
1497
1611
  },
1498
1612
  _internal: {
@@ -1506,7 +1620,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1506
1620
  latencyMs,
1507
1621
  textLength: text.length,
1508
1622
  grade,
1509
- provider: "openai:gpt-4o",
1623
+ provider: this.provider.label,
1510
1624
  tokenUsage: totalTokenUsage,
1511
1625
  metadata: {
1512
1626
  stage_details: stageDetails
@@ -1541,7 +1655,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1541
1655
  latencyMs,
1542
1656
  textLength: text.length,
1543
1657
  grade,
1544
- provider: "openai:gpt-4o",
1658
+ provider: this.provider.label,
1545
1659
  tokenUsage: totalTokenUsage,
1546
1660
  errorCode: error instanceof Error ? error.name : "UnknownError",
1547
1661
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -1569,7 +1683,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1569
1683
  `flesch_kincaid_grade: ${metrics.fleschKincaidGrade}`
1570
1684
  ].join("\n");
1571
1685
  const userPrompt = getUserPromptAnalysis(text, gtCountsStr);
1572
- const response = await this.analysisProvider.generateStructured({
1686
+ const response = await this.provider.generateStructured({
1573
1687
  messages: [
1574
1688
  { role: "system", content: getSystemPromptAnalysis() },
1575
1689
  { role: "user", content: userPrompt }
@@ -1591,7 +1705,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1591
1705
  async classifyComplexity(features, grade, excerpt) {
1592
1706
  const featuresJSON = featuresToJSON(features, 1, true);
1593
1707
  const userPrompt = getUserPromptComplexity(featuresJSON, grade, excerpt);
1594
- const response = await this.complexityProvider.generateStructured({
1708
+ const response = await this.provider.generateStructured({
1595
1709
  messages: [
1596
1710
  { role: "system", content: getSystemPromptComplexity() },
1597
1711
  { role: "user", content: userPrompt }
@@ -1647,18 +1761,16 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1647
1761
  description: "Determines appropriate grade level for text with scaffolding recommendations",
1648
1762
  supportedGrades: [],
1649
1763
  // No grade parameter required - evaluates what grade the text is appropriate for
1650
- requiresGoogleKey: true,
1651
- requiresOpenAIKey: false
1764
+ defaultProviders: ["google" /* Google */]
1652
1765
  };
1653
1766
  provider;
1654
1767
  constructor(config) {
1655
1768
  super(config);
1656
- this.provider = createProvider({
1657
- type: "google",
1658
- model: "gemini-2.5-pro",
1659
- apiKey: config.googleApiKey,
1660
- maxRetries: this.config.maxRetries
1661
- });
1769
+ this.provider = this.createConfiguredProvider(
1770
+ "google" /* Google */,
1771
+ "gemini-2.5-pro",
1772
+ config.googleApiKey
1773
+ );
1662
1774
  }
1663
1775
  /**
1664
1776
  * Evaluate grade level appropriateness for a given text
@@ -1666,6 +1778,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1666
1778
  * @param text - The text to evaluate
1667
1779
  * @returns Evaluation result with grade recommendations and scaffolding suggestions
1668
1780
  * @throws {ValidationError} If text is empty or too short/long
1781
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1669
1782
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1670
1783
  */
1671
1784
  async evaluate(text) {
@@ -1699,7 +1812,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1699
1812
  score: response.data.grade,
1700
1813
  reasoning: response.data.reasoning,
1701
1814
  metadata: {
1702
- model: "google:gemini-2.5-pro",
1815
+ model: this.provider.label,
1703
1816
  processingTimeMs: latencyMs
1704
1817
  },
1705
1818
  _internal: response.data
@@ -1708,7 +1821,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1708
1821
  status: "success",
1709
1822
  latencyMs,
1710
1823
  textLength: text.length,
1711
- provider: "google:gemini-2.5-pro",
1824
+ provider: this.provider.label,
1712
1825
  tokenUsage,
1713
1826
  // No metadata.stage_details for single-stage evaluator
1714
1827
  inputText: text
@@ -1733,7 +1846,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1733
1846
  status: "error",
1734
1847
  latencyMs,
1735
1848
  textLength: text.length,
1736
- provider: "google:gemini-2.5-pro",
1849
+ provider: this.provider.label,
1737
1850
  errorCode: error instanceof Error ? error.name : "UnknownError",
1738
1851
  inputText: text
1739
1852
  }).catch(() => {
@@ -1844,18 +1957,16 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1844
1957
  name: "Subject Matter Knowledge",
1845
1958
  description: "Evaluates background knowledge demands of educational texts relative to grade level",
1846
1959
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1847
- requiresGoogleKey: true,
1848
- requiresOpenAIKey: false
1960
+ defaultProviders: ["google" /* Google */]
1849
1961
  };
1850
1962
  provider;
1851
1963
  constructor(config) {
1852
1964
  super(config);
1853
- this.provider = createProvider({
1854
- type: "google",
1855
- model: "gemini-3-flash-preview",
1856
- apiKey: config.googleApiKey,
1857
- maxRetries: this.config.maxRetries
1858
- });
1965
+ this.provider = this.createConfiguredProvider(
1966
+ "google" /* Google */,
1967
+ "gemini-3-flash-preview",
1968
+ config.googleApiKey
1969
+ );
1859
1970
  }
1860
1971
  /**
1861
1972
  * Evaluate subject matter knowledge complexity for a given text and grade level
@@ -1864,6 +1975,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1864
1975
  * @param grade - The target grade level (3-12)
1865
1976
  * @returns Evaluation result with complexity score and detailed analysis
1866
1977
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1978
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1867
1979
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1868
1980
  */
1869
1981
  async evaluate(text, grade) {
@@ -1886,7 +1998,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1886
1998
  const response = await this.evaluateSmk(text, grade, fkScore);
1887
1999
  stageDetails.push({
1888
2000
  stage: "smk_evaluation",
1889
- provider: "google:gemini-3-flash-preview",
2001
+ provider: this.provider.label,
1890
2002
  latency_ms: response.latencyMs,
1891
2003
  token_usage: {
1892
2004
  input_tokens: response.usage.inputTokens,
@@ -1902,7 +2014,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1902
2014
  score: response.data.complexity_score,
1903
2015
  reasoning: response.data.reasoning,
1904
2016
  metadata: {
1905
- model: "google:gemini-3-flash-preview",
2017
+ model: this.provider.label,
1906
2018
  processingTimeMs: latencyMs
1907
2019
  },
1908
2020
  _internal: response.data
@@ -1912,7 +2024,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1912
2024
  latencyMs,
1913
2025
  textLength: text.length,
1914
2026
  grade,
1915
- provider: "google:gemini-3-flash-preview",
2027
+ provider: this.provider.label,
1916
2028
  tokenUsage: totalTokenUsage,
1917
2029
  metadata: {
1918
2030
  stage_details: stageDetails
@@ -1947,7 +2059,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1947
2059
  latencyMs,
1948
2060
  textLength: text.length,
1949
2061
  grade,
1950
- provider: "google:gemini-3-flash-preview",
2062
+ provider: this.provider.label,
1951
2063
  tokenUsage: totalTokenUsage,
1952
2064
  errorCode: error instanceof Error ? error.name : "UnknownError",
1953
2065
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -2051,18 +2163,16 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2051
2163
  name: "Conventionality",
2052
2164
  description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
2053
2165
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
2054
- requiresGoogleKey: true,
2055
- requiresOpenAIKey: false
2166
+ defaultProviders: ["google" /* Google */]
2056
2167
  };
2057
2168
  provider;
2058
2169
  constructor(config) {
2059
2170
  super(config);
2060
- this.provider = createProvider({
2061
- type: "google",
2062
- model: "gemini-3-flash-preview",
2063
- apiKey: config.googleApiKey,
2064
- maxRetries: this.config.maxRetries
2065
- });
2171
+ this.provider = this.createConfiguredProvider(
2172
+ "google" /* Google */,
2173
+ "gemini-3-flash-preview",
2174
+ config.googleApiKey
2175
+ );
2066
2176
  }
2067
2177
  /**
2068
2178
  * Evaluate conventionality complexity for a given text and grade level
@@ -2071,6 +2181,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2071
2181
  * @param grade - The target grade level (3-12)
2072
2182
  * @returns Evaluation result with complexity score and detailed analysis
2073
2183
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2184
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2074
2185
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2075
2186
  */
2076
2187
  async evaluate(text, grade) {
@@ -2093,7 +2204,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2093
2204
  const response = await this.evaluateConventionality(text, grade, fkScore);
2094
2205
  stageDetails.push({
2095
2206
  stage: "conventionality_evaluation",
2096
- provider: "google:gemini-3-flash-preview",
2207
+ provider: this.provider.label,
2097
2208
  latency_ms: response.latencyMs,
2098
2209
  token_usage: {
2099
2210
  input_tokens: response.usage.inputTokens,
@@ -2109,7 +2220,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2109
2220
  score: response.data.complexity_score,
2110
2221
  reasoning: response.data.reasoning,
2111
2222
  metadata: {
2112
- model: "google:gemini-3-flash-preview",
2223
+ model: this.provider.label,
2113
2224
  processingTimeMs: latencyMs
2114
2225
  },
2115
2226
  _internal: response.data
@@ -2119,7 +2230,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2119
2230
  latencyMs,
2120
2231
  textLength: text.length,
2121
2232
  grade,
2122
- provider: "google:gemini-3-flash-preview",
2233
+ provider: this.provider.label,
2123
2234
  tokenUsage: totalTokenUsage,
2124
2235
  metadata: {
2125
2236
  stage_details: stageDetails
@@ -2154,7 +2265,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2154
2265
  latencyMs,
2155
2266
  textLength: text.length,
2156
2267
  grade,
2157
- provider: "google:gemini-3-flash-preview",
2268
+ provider: this.provider.label,
2158
2269
  tokenUsage: totalTokenUsage,
2159
2270
  errorCode: error instanceof Error ? error.name : "UnknownError",
2160
2271
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -2186,6 +2297,276 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2186
2297
  };
2187
2298
  }
2188
2299
  };
2300
+ var PurposeOutputSchema = z.object({ "complexity_score": z.enum(["slightly_complex", "moderately_complex", "very_complex", "exceedingly_complex", "more_context_needed"]).describe("The Purpose complexity level for the target grade."), "reasoning": z.string().describe("A high-level summary of why the text is at this complexity level for the target grade."), "details": z.object({ "detailed_summary": z.array(z.object({ "factor": z.string().describe("The specific text complexity factor identified."), "description": z.string().describe("How this factor manifests in the text."), "effect_on_complexity_dimension": z.string().describe("How this factor affects the reader's ability to understand the text's specific complexity dimension.") }).strict()).describe("Individual complexity factors with descriptions and their effects."), "adjustment_and_scaffolding": z.array(z.object({ "scaffolding_need": z.string().describe("The complexity factor that requires scaffolding."), "suggestion": z.string().describe("A specific instructional strategy to support students with this factor.") }).strict()).describe("Scaffolding strategies to make the text accessible at the target grade."), "recommended_use_cases": z.array(z.object({ "opportunity": z.string().describe("An instructional opportunity related to the text."), "suggestion": z.string().describe("A specific way to leverage this text for that instructional purpose.") }).strict()).describe("Additional instructional opportunities for using this text.") }).strict().describe("Practical instructional details including scaffolding strategies and recommended use cases.") }).strict();
2301
+
2302
+ // ../../evals/prompts/purpose/system.txt
2303
+ var system_default4 = '\n Role\n You are an expert reading assessment evaluator. Your task is to determine the Text Complexity of a given passage based exclusively on the Purpose dimension of the qualitative measures rubric.\n\n Task Details\n You will be provided with an informational or literary `text`, along with its `grade_level` and `fk_score` (Flesch-Kincaid). You must analyze the text and determine how difficult it is for a reader to identify the author\'s purpose. \n\n Crucially, you must distinguish between the text\'s *topic* (what it is about) and its *purpose* (why the author wrote it). \n\n Rubric: Purpose Complexity\n Exceedingly Complex: Subtle and intricate, difficult to determine; includes many theoretical or abstract elements.\n Very Complex: Implicit or subtle but fairly easy to infer; more theoretical or abstract than concrete.\n Moderately Complex: Implied but easy to identify based upon context or source.\n Slightly Complex: Explicitly stated, clear, concrete, narrowly focused.\n More Context Needed: The text is a fragment or lacks necessary introductory context, making the true purpose impossible to determine accurately without external background knowledge.\n\n Expert Rules for Evaluating Purpose\n Based on expert consensus and historical grading corrections, you must apply the following heuristics:\n\n 1. The "Slightly Complex" Benchmark (Straightforward and Explicit)\n A text is Slightly Complex if its purpose is explicitly stated or if its informative intent is straightforward, clear, concrete, and directly answers what the text is immediately about. If the text opens by clearly identifying a concrete topic (e.g., "Pins are made of either brass or iron wire") and rigidly follows through by explaining factual, practical information or a process (like manufacturing steps or geographic facts), the purpose is considered explicit and straightforward. It does *not* require a literal statement like "The purpose of this text is to..." as long as the delivery of information is direct, clear, and unadorned by persuasive elements or complex framing.\n\n 2. Moderately Complex via Guiding Questions & Inquiry Formats\n If a text begins with a general introduction and uses guiding questions (e.g., "Have you ever wondered how clouds are formed?") to transition into an explanation, the purpose is implied rather than explicitly stated upfront. Because the reader must recognize the question as the pivot point for the author\'s intent, it is Moderately Complex.\n\n 3. Moderately Complex via Multiple Distinct Informational Goals\n If a text covers a broad topic but jumps between several distinct scientific or informational objectives without an overarching framing device or explicit thesis (e.g., talking about measuring ice sheets, then mapping, then finding meteorites), the reader must synthesize these diverse facts to recognize the broader purpose, making it Moderately Complex.\n\n 4. Moderately Complex via Arguments Disguised as Information\n If an author is arguing a specific point, correcting a misconception, or defending a stance, but the text could initially be mistaken by students as purely informative factual text, it is Moderately Complex. The reader must infer the persuasive intent or argumentative purpose beneath the informative tone.\n\n 5. "More Context Needed" for Fragments\n If a text is a fragment missing a crucial introduction or context, and identifying the author\'s purpose beyond a simple surface-level description would be exceptionally difficult for a reader in the target grade level without that external background, score it as `more_context_needed`. \n\n Output Format\n Provide your evaluation in the following structure:\n reasoning:\n - Surface Analysis: Identify if the text clearly identifies its topic and delivers straightforward facts, or if it utilizes structural cues, titles, or direct thesis statements.\n - Subtlety & Framing: Is the informative purpose straightforward and concrete? Does it use guiding questions? Is it an argument disguised as pure information? Are there multiple distinct informational goals requiring synthesis?\n - Context Check: Is this text a fragment missing crucial context that obscures the deeper purpose for the target grade level?\n - Rubric Alignment: Explain how the text aligns with the specific language of the rubric, explicitly referencing the expert rules above. Justify why it isn\'t one level simpler or more complex.\n\n answer:\n - complexity_score: (slightly_complex, moderately_complex, very_complex, exceedingly_complex, more_context_needed)\n - reasoning: A brief summary of your final decision.\n - details: Structured breakdown of PurposeDetails including detailed_summary, adjustment_and_scaffolding, and recommended_use_cases.\n';
2304
+
2305
+ // ../../evals/prompts/purpose/user.txt
2306
+ var user_default4 = "Analyze:\nText: {text}\nGrade: {grade_level}\nFK Score: {fk_score}";
2307
+
2308
+ // ../../evals/prompts/purpose/config.json
2309
+ var config_default = {
2310
+ evaluator: {
2311
+ id: "literacy.gla.purpose",
2312
+ name: "Purpose Dimension Text Complexity Evaluator",
2313
+ description: "Evaluates the Purpose dimension of qualitative text complexity for K-12 reading assessment, producing a 5-level rubric rating with structured pedagogical detail."
2314
+ },
2315
+ preprocessing: [
2316
+ {
2317
+ id: "fk_score",
2318
+ kind: "flesch_kincaid_grade",
2319
+ description: "Compute the Flesch-Kincaid Grade Level for the input text and bind it to {fk_score} in the prompt.",
2320
+ input: "text",
2321
+ output: "fk_score",
2322
+ implementation: {
2323
+ python: {
2324
+ library: "textstat",
2325
+ function: "flesch_kincaid_grade",
2326
+ post_transform: {
2327
+ type: "round",
2328
+ precision: 2
2329
+ }
2330
+ },
2331
+ typescript: {
2332
+ library: "text-readability",
2333
+ function: "fleschKincaidGrade",
2334
+ post_transform: {
2335
+ type: "round",
2336
+ precision: 2
2337
+ }
2338
+ }
2339
+ }
2340
+ }
2341
+ ],
2342
+ steps: [
2343
+ {
2344
+ id: "evaluate_purpose",
2345
+ description: "Single-call LLM step that produces the EvaluatorOutput JSON.",
2346
+ prompt: {
2347
+ type: "chat",
2348
+ messages: [
2349
+ {
2350
+ role: "system",
2351
+ source_path: "system.txt",
2352
+ sha256: "745b95b7d54dc845b99363c9d3360355381883c22a5f6a0f305d7349cae38a54"
2353
+ },
2354
+ {
2355
+ role: "user",
2356
+ source_path: "user.txt",
2357
+ sha256: "cd8e6347db1a55d104e34436f8f66e833bd6583645d4786a554aaefdd26479b2"
2358
+ }
2359
+ ],
2360
+ placeholders: {
2361
+ text: {
2362
+ required: true,
2363
+ source: "input"
2364
+ },
2365
+ grade_level: {
2366
+ required: true,
2367
+ source: "input"
2368
+ },
2369
+ fk_score: {
2370
+ required: true,
2371
+ source: "preprocessing.fk_score"
2372
+ }
2373
+ }
2374
+ },
2375
+ model: {
2376
+ provider: "google",
2377
+ name: "gemini-3-flash-preview"
2378
+ },
2379
+ generation: {
2380
+ temperature: 0
2381
+ },
2382
+ parser: {
2383
+ kind: "structured_output"
2384
+ },
2385
+ output_binding: "formatted_output"
2386
+ }
2387
+ ]};
2388
+
2389
+ // src/prompts/purpose/index.ts
2390
+ var STEP_ID = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
2391
+ var _step = config_default.steps.find((s) => s.id === STEP_ID);
2392
+ if (!_step) throw new Error(`Step "${STEP_ID}" not found in purpose config.json`);
2393
+ var PLACEHOLDER_KEYS = Object.keys(_step.prompt.placeholders);
2394
+ function applyPlaceholders(template, inputs) {
2395
+ return PLACEHOLDER_KEYS.reduce(
2396
+ (text, key) => key in inputs ? text.replaceAll(`{${key}}`, inputs[key]) : text,
2397
+ template
2398
+ );
2399
+ }
2400
+ function getSystemPrompt5(inputs) {
2401
+ return applyPlaceholders(system_default4, inputs);
2402
+ }
2403
+ function getUserPrompt5(inputs) {
2404
+ return applyPlaceholders(user_default4, inputs);
2405
+ }
2406
+
2407
+ // ../../evals/prompts/purpose/input_schema.json
2408
+ var input_schema_default = {
2409
+ properties: {
2410
+ grade_level: {
2411
+ minimum: 3,
2412
+ maximum: 12}
2413
+ }
2414
+ };
2415
+
2416
+ // src/evaluators/purpose.ts
2417
+ var STEP_ID2 = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
2418
+ var _step2 = config_default.steps.find((s) => s.id === STEP_ID2);
2419
+ if (!_step2) throw new Error(`Step "${STEP_ID2}" not found in purpose config.json`);
2420
+ var STEP = _step2;
2421
+ var GRADE_MIN = input_schema_default.properties.grade_level.minimum;
2422
+ var GRADE_MAX = input_schema_default.properties.grade_level.maximum;
2423
+ var SUPPORTED_GRADES = Array.from({ length: GRADE_MAX - GRADE_MIN + 1 }, (_, i) => String(GRADE_MIN + i));
2424
+ var COMPLEXITY_SCORE_DISPLAY = {
2425
+ "slightly_complex": "Slightly complex",
2426
+ "moderately_complex": "Moderately complex",
2427
+ "very_complex": "Very complex",
2428
+ "exceedingly_complex": "Exceedingly complex",
2429
+ "more_context_needed": "More context needed"
2430
+ };
2431
+ var PurposeEvaluator = class _PurposeEvaluator extends BaseEvaluator {
2432
+ static metadata = {
2433
+ id: config_default.evaluator.id,
2434
+ name: config_default.evaluator.name,
2435
+ description: config_default.evaluator.description,
2436
+ supportedGrades: SUPPORTED_GRADES,
2437
+ defaultProviders: ["google" /* Google */]
2438
+ };
2439
+ static TEMPERATURE = STEP.generation.temperature;
2440
+ static computeFkScore(text) {
2441
+ const fkStep = config_default.preprocessing.find((p) => p.id === "fk_score");
2442
+ if (!fkStep) throw new Error("fk_score preprocessing step not found in purpose config.json");
2443
+ return runPreprocessingStep(text, fkStep.implementation.typescript);
2444
+ }
2445
+ provider;
2446
+ constructor(config) {
2447
+ super(config);
2448
+ this.provider = this.createConfiguredProvider(
2449
+ "google" /* Google */,
2450
+ STEP.model.name,
2451
+ config.googleApiKey
2452
+ );
2453
+ }
2454
+ /**
2455
+ * Evaluate purpose complexity for a given text and grade level
2456
+ *
2457
+ * @param text - The text to evaluate
2458
+ * @param grade - The target grade level (3-12)
2459
+ * @returns Evaluation result with complexity score and detailed analysis
2460
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2461
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2462
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2463
+ */
2464
+ async evaluate(text, grade) {
2465
+ this.logger.info("Starting Purpose evaluation", {
2466
+ evaluator: _PurposeEvaluator.metadata.id,
2467
+ operation: "evaluate",
2468
+ grade,
2469
+ textLength: text.length
2470
+ });
2471
+ const startTime = Date.now();
2472
+ const stageDetails = [];
2473
+ try {
2474
+ this.validateText(text);
2475
+ const gradeNum = this.parseAndValidateGrade(grade);
2476
+ const fkScore = _PurposeEvaluator.computeFkScore(text);
2477
+ const inputs = {
2478
+ text,
2479
+ grade_level: String(gradeNum),
2480
+ fk_score: String(fkScore)
2481
+ };
2482
+ const response = await this.callLLM(inputs);
2483
+ const latencyMs = Date.now() - startTime;
2484
+ const tokenUsage = {
2485
+ input_tokens: response.usage.inputTokens,
2486
+ output_tokens: response.usage.outputTokens
2487
+ };
2488
+ stageDetails.push({
2489
+ stage: STEP.id,
2490
+ provider: this.provider.label,
2491
+ latency_ms: response.latencyMs,
2492
+ token_usage: tokenUsage
2493
+ });
2494
+ const result = {
2495
+ score: COMPLEXITY_SCORE_DISPLAY[response.data.complexity_score],
2496
+ reasoning: response.data.reasoning,
2497
+ metadata: {
2498
+ model: this.provider.label,
2499
+ processingTimeMs: latencyMs
2500
+ },
2501
+ _internal: response.data
2502
+ };
2503
+ this.sendTelemetry({
2504
+ status: "success",
2505
+ latencyMs,
2506
+ textLength: text.length,
2507
+ grade: String(gradeNum),
2508
+ provider: this.provider.label,
2509
+ tokenUsage,
2510
+ metadata: { stage_details: stageDetails },
2511
+ inputText: text
2512
+ }).catch(() => void 0);
2513
+ this.logger.info("Purpose evaluation completed successfully", {
2514
+ evaluator: _PurposeEvaluator.metadata.id,
2515
+ operation: "evaluate",
2516
+ grade: gradeNum,
2517
+ score: result.score,
2518
+ processingTimeMs: latencyMs
2519
+ });
2520
+ return result;
2521
+ } catch (error) {
2522
+ const latencyMs = Date.now() - startTime;
2523
+ this.logger.error("Purpose evaluation failed", {
2524
+ evaluator: _PurposeEvaluator.metadata.id,
2525
+ operation: "evaluate",
2526
+ grade,
2527
+ error: error instanceof Error ? error : void 0,
2528
+ processingTimeMs: latencyMs
2529
+ });
2530
+ const tokenUsage = stageDetails.length > 0 ? {
2531
+ input_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.input_tokens ?? 0), 0),
2532
+ output_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.output_tokens ?? 0), 0)
2533
+ } : void 0;
2534
+ this.sendTelemetry({
2535
+ status: "error",
2536
+ latencyMs,
2537
+ textLength: text.length,
2538
+ grade: String(grade),
2539
+ provider: this.provider.label,
2540
+ tokenUsage,
2541
+ errorCode: error instanceof Error ? error.name : "UnknownError",
2542
+ metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
2543
+ inputText: text
2544
+ }).catch(() => void 0);
2545
+ if (error instanceof ValidationError) throw error;
2546
+ throw wrapProviderError(error, "Purpose evaluation failed");
2547
+ }
2548
+ }
2549
+ parseAndValidateGrade(grade) {
2550
+ const num = Number(grade.trim());
2551
+ if (!Number.isInteger(num) || num < GRADE_MIN || num > GRADE_MAX) {
2552
+ throw new ValidationError(
2553
+ `Invalid grade "${grade}". Purpose evaluator supports integer grades ${GRADE_MIN}\u2013${GRADE_MAX}.`
2554
+ );
2555
+ }
2556
+ return num;
2557
+ }
2558
+ async callLLM(inputs) {
2559
+ const response = await this.provider.generateStructured({
2560
+ messages: [
2561
+ { role: "system", content: getSystemPrompt5(inputs) },
2562
+ { role: "user", content: getUserPrompt5(inputs) }
2563
+ ],
2564
+ schema: PurposeOutputSchema,
2565
+ temperature: _PurposeEvaluator.TEMPERATURE
2566
+ });
2567
+ return { data: response.data, usage: response.usage, latencyMs: response.latencyMs };
2568
+ }
2569
+ };
2189
2570
 
2190
2571
  // src/batch/evaluator.ts
2191
2572
  var EVALUATOR_MAP = /* @__PURE__ */ new Map([
@@ -2193,19 +2574,21 @@ var EVALUATOR_MAP = /* @__PURE__ */ new Map([
2193
2574
  [SmkEvaluator.metadata.id, SmkEvaluator],
2194
2575
  [VocabularyEvaluator.metadata.id, VocabularyEvaluator],
2195
2576
  [SentenceStructureEvaluator.metadata.id, SentenceStructureEvaluator],
2196
- [ConventionalityEvaluator.metadata.id, ConventionalityEvaluator]
2577
+ [ConventionalityEvaluator.metadata.id, ConventionalityEvaluator],
2578
+ [PurposeEvaluator.metadata.id, PurposeEvaluator]
2197
2579
  ]);
2198
2580
  var EVALUATOR_GROUPS = [
2199
2581
  {
2200
2582
  id: "text-complexity",
2201
2583
  name: "Text Complexity Analysis",
2202
- description: "Evaluates vocabulary complexity, sentence structure, subject matter knowledge, conventionality, and grade-level appropriateness",
2584
+ description: "Evaluates all dimensions of the Qualitative Text Complexity rubric",
2203
2585
  evaluatorIds: [
2204
2586
  GradeLevelAppropriatenessEvaluator.metadata.id,
2205
2587
  SmkEvaluator.metadata.id,
2206
2588
  VocabularyEvaluator.metadata.id,
2207
2589
  SentenceStructureEvaluator.metadata.id,
2208
- ConventionalityEvaluator.metadata.id
2590
+ ConventionalityEvaluator.metadata.id,
2591
+ PurposeEvaluator.metadata.id
2209
2592
  ],
2210
2593
  requiresGoogleKey: true,
2211
2594
  requiresOpenAIKey: true,
@@ -3296,6 +3679,8 @@ var COMPLEXITY_SCORE_MAP = {
3296
3679
  "moderately complex": 2,
3297
3680
  "very complex": 3,
3298
3681
  "exceedingly complex": 4
3682
+ // 'more context needed' has no numeric equivalent — rows with this score appear as N/A
3683
+ // in individual results and are excluded from aggregate stats, same as failed evaluations.
3299
3684
  };
3300
3685
  function evaluatorDisplayName(id) {
3301
3686
  return id.split("-").map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
@@ -3350,7 +3735,8 @@ function groupResultsByRow(results) {
3350
3735
  return grouped;
3351
3736
  }
3352
3737
  function formatEvaluatorPrefix(evaluatorId) {
3353
- return evaluatorId.replace(/-/g, "_");
3738
+ const slug = evaluatorId.includes(".") ? evaluatorId.split(".").pop() : evaluatorId;
3739
+ return slug.replace(/-/g, "_");
3354
3740
  }
3355
3741
  function escapeCSV(field) {
3356
3742
  if (field.includes(",") || field.includes('"') || field.includes("\n")) {