@learning-commons/evaluators 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/batch/cli.js CHANGED
@@ -13,6 +13,7 @@ import { generateText, Output } from 'ai';
13
13
  import { z } from 'zod';
14
14
  import nlp from 'compromise';
15
15
  import { syllable } from 'syllable';
16
+ import textReadability from 'text-readability';
16
17
  import { parse } from 'csv-parse/sync';
17
18
 
18
19
  // src/telemetry/client.ts
@@ -174,8 +175,9 @@ var TimeoutError = class extends APIError {
174
175
  function parseProviderError(error) {
175
176
  if (error instanceof Error) {
176
177
  const message = error.message;
178
+ const err = error;
177
179
  const statusMatch = message.match(/\b(4\d{2}|5\d{2})\b/);
178
- const statusCode = statusMatch ? parseInt(statusMatch[1]) : void 0;
180
+ const statusCode = err.statusCode ?? err.status ?? (statusMatch ? parseInt(statusMatch[1]) : void 0);
179
181
  return {
180
182
  message,
181
183
  statusCode,
@@ -188,6 +190,11 @@ function parseProviderError(error) {
188
190
  }
189
191
  function wrapProviderError(error, defaultMessage = "API request failed") {
190
192
  const { message, statusCode, code } = parseProviderError(error);
193
+ if (statusCode === 404 || statusCode === 400 && /\bmodel\b.*(not found|does not exist|invalid)/i.test(message)) {
194
+ return new ConfigurationError(
195
+ `Model not found or invalid: ${message}. Check the model ID passed to the provider.`
196
+ );
197
+ }
191
198
  if (statusCode === 401 || statusCode === 403) {
192
199
  return new AuthenticationError(
193
200
  message.includes("API key") ? message : "Invalid API key",
@@ -262,6 +269,111 @@ function createLogger(customLogger, level = 2 /* WARN */) {
262
269
  }
263
270
  return new ConsoleLogger(level);
264
271
  }
272
+ var VercelAIProvider = class {
273
+ constructor(config) {
274
+ this.config = config;
275
+ if (config.type === "custom") {
276
+ throw new Error(
277
+ "VercelAIProvider does not support custom type. Use config.customProvider directly."
278
+ );
279
+ }
280
+ if (!config.model || config.model.trim() === "") {
281
+ throw new Error(
282
+ `model is required for VercelAIProvider (type: "${config.type}"). No default is assumed.`
283
+ );
284
+ }
285
+ this.model = config.model;
286
+ this.label = `${config.type}:${config.model}`;
287
+ }
288
+ label;
289
+ model;
290
+ /**
291
+ * Generate structured output using Vercel AI SDK's generateText with output
292
+ */
293
+ async generateStructured(request) {
294
+ const model = await this.getModel();
295
+ const startTime = Date.now();
296
+ const { output, usage } = await generateText({
297
+ model,
298
+ messages: request.messages,
299
+ output: Output.object({ schema: request.schema }),
300
+ temperature: request.temperature ?? 0,
301
+ maxRetries: this.config.maxRetries ?? 0,
302
+ ...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
303
+ });
304
+ return {
305
+ data: output,
306
+ model: this.model,
307
+ usage: {
308
+ inputTokens: usage.inputTokens || 0,
309
+ outputTokens: usage.outputTokens || 0
310
+ },
311
+ latencyMs: Date.now() - startTime
312
+ };
313
+ }
314
+ /**
315
+ * Generate plain text using Vercel AI SDK's generateText
316
+ */
317
+ async generateText(messages, temperature) {
318
+ const model = await this.getModel();
319
+ const startTime = Date.now();
320
+ const { text, usage } = await generateText({
321
+ model,
322
+ messages,
323
+ temperature: temperature ?? this.config.temperature ?? 0,
324
+ maxRetries: this.config.maxRetries ?? 0
325
+ });
326
+ return {
327
+ text,
328
+ usage: {
329
+ inputTokens: usage.inputTokens || 0,
330
+ outputTokens: usage.outputTokens || 0
331
+ },
332
+ latencyMs: Date.now() - startTime
333
+ };
334
+ }
335
+ /**
336
+ * Get the configured language model.
337
+ * Uses dynamic imports so consumers only need to install the provider packages they use.
338
+ */
339
+ async getModel() {
340
+ const apiKey = this.config.apiKey;
341
+ switch (this.config.type) {
342
+ case "openai": {
343
+ const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
344
+ throw new Error(
345
+ "To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
346
+ );
347
+ });
348
+ return createOpenAI(apiKey ? { apiKey } : {})(this.model);
349
+ }
350
+ case "anthropic": {
351
+ const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
352
+ throw new Error(
353
+ "To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
354
+ );
355
+ });
356
+ return createAnthropic(apiKey ? { apiKey } : {})(this.model);
357
+ }
358
+ case "google": {
359
+ const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
360
+ throw new Error(
361
+ "To use the Google provider, install its adapter: npm install @ai-sdk/google"
362
+ );
363
+ });
364
+ return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(this.model);
365
+ }
366
+ default:
367
+ throw new Error(`Unsupported provider type: ${this.config.type}`);
368
+ }
369
+ }
370
+ };
371
+ function createProvider(config) {
372
+ if (config.type === "custom" && config.customProvider) {
373
+ return config.customProvider;
374
+ }
375
+ return new VercelAIProvider(config);
376
+ }
265
377
 
266
378
  // src/evaluators/base.ts
267
379
  var VALIDATION_LIMITS = {
@@ -270,6 +382,12 @@ var VALIDATION_LIMITS = {
270
382
  /** Maximum text length in characters (100K chars ≈ 25K tokens) */
271
383
  MAX_TEXT_LENGTH: 1e5
272
384
  };
385
+ var Provider = /* @__PURE__ */ ((Provider2) => {
386
+ Provider2["OpenAI"] = "openai";
387
+ Provider2["Google"] = "google";
388
+ Provider2["Anthropic"] = "anthropic";
389
+ return Provider2;
390
+ })(Provider || {});
273
391
  var BaseEvaluator = class {
274
392
  telemetryClient;
275
393
  logger;
@@ -287,21 +405,35 @@ var BaseEvaluator = class {
287
405
  * name: 'My Evaluator',
288
406
  * description: 'Does something useful',
289
407
  * supportedGrades: ['3', '4', '5'],
290
- * requiresGoogleKey: true,
291
- * requiresOpenAIKey: false,
408
+ * defaultProviders: [Provider.Google],
292
409
  * };
293
410
  * }
294
411
  * ```
295
412
  */
296
413
  static metadata;
414
+ /**
415
+ * @throws {ConfigurationError} If the subclass has not defined static metadata
416
+ * @throws {ConfigurationError} If modelOverride has an invalid provider or empty model
417
+ * @throws {ConfigurationError} If a required API key is missing
418
+ */
297
419
  constructor(config) {
298
420
  this.logger = createLogger(config.logger, config.logLevel ?? 2 /* WARN */);
421
+ this.validateModelOverride(config);
299
422
  this.validateApiKeys(config);
300
423
  const telemetryConfig = this.normalizeTelemetryConfig(config.telemetry);
301
424
  this.config = {
302
425
  maxRetries: config.maxRetries ?? 2,
303
- telemetry: telemetryConfig
426
+ telemetry: telemetryConfig,
427
+ modelOverride: config.modelOverride,
428
+ googleApiKey: config.googleApiKey,
429
+ openaiApiKey: config.openaiApiKey,
430
+ anthropicApiKey: config.anthropicApiKey
304
431
  };
432
+ if (config.modelOverride) {
433
+ this.logger.warn(
434
+ `modelOverride is active: using ${config.modelOverride.provider}:${config.modelOverride.model} instead of the default model. Evaluation quality may differ from recommended defaults.`
435
+ );
436
+ }
305
437
  if (this.config.telemetry.enabled) {
306
438
  this.telemetryClient = new TelemetryClient({
307
439
  endpoint: "https://api.learningcommons.org/evaluators-telemetry/v1/events",
@@ -326,21 +458,62 @@ var BaseEvaluator = class {
326
458
  return meta;
327
459
  }
328
460
  /**
329
- * Validate that required API keys are provided based on metadata
330
- * @throws {ConfigurationError} If required API keys are missing
461
+ * Validate modelOverride shape: provider must be a known Provider value and
462
+ * model must be a non-empty string.
463
+ * @throws {ConfigurationError} If the override is malformed
331
464
  */
332
- validateApiKeys(config) {
333
- if (this.metadata.requiresGoogleKey && !config.googleApiKey) {
465
+ validateModelOverride(config) {
466
+ if (!config.modelOverride) return;
467
+ const validProviders = Object.values(Provider);
468
+ if (!validProviders.includes(config.modelOverride.provider)) {
334
469
  throw new ConfigurationError(
335
- `Google API key is required for ${this.metadata.name} evaluator. Pass googleApiKey in config.`
470
+ `Invalid provider "${config.modelOverride.provider}" in modelOverride. Valid providers are: ${validProviders.join(", ")}.`
336
471
  );
337
472
  }
338
- if (this.metadata.requiresOpenAIKey && !config.openaiApiKey) {
473
+ if (!config.modelOverride.model || config.modelOverride.model.trim() === "") {
339
474
  throw new ConfigurationError(
340
- `OpenAI API key is required for ${this.metadata.name} evaluator. Pass openaiApiKey in config.`
475
+ `modelOverride.model is required. Specify the model ID for provider "${config.modelOverride.provider}".`
341
476
  );
342
477
  }
343
478
  }
479
+ /**
480
+ * Validate that the required API key is present.
481
+ * When modelOverride is set, checks the override provider's key.
482
+ * Otherwise checks the keys required by the evaluator's default providers.
483
+ * @throws {ConfigurationError} If a required key is missing
484
+ */
485
+ validateApiKeys(config) {
486
+ const keyFor = {
487
+ ["openai" /* OpenAI */]: config.openaiApiKey?.trim() || void 0,
488
+ ["google" /* Google */]: config.googleApiKey?.trim() || void 0,
489
+ ["anthropic" /* Anthropic */]: config.anthropicApiKey?.trim() || void 0
490
+ };
491
+ const humanName = {
492
+ ["openai" /* OpenAI */]: "OpenAI API key",
493
+ ["google" /* Google */]: "Google API key",
494
+ ["anthropic" /* Anthropic */]: "Anthropic API key"
495
+ };
496
+ const configKey = {
497
+ ["openai" /* OpenAI */]: "openaiApiKey",
498
+ ["google" /* Google */]: "googleApiKey",
499
+ ["anthropic" /* Anthropic */]: "anthropicApiKey"
500
+ };
501
+ if (config.modelOverride) {
502
+ if (!keyFor[config.modelOverride.provider]) {
503
+ throw new ConfigurationError(
504
+ `${humanName[config.modelOverride.provider]} is required when using modelOverride with provider "${config.modelOverride.provider}". Pass ${configKey[config.modelOverride.provider]} in config.`
505
+ );
506
+ }
507
+ return;
508
+ }
509
+ for (const provider of this.metadata.defaultProviders) {
510
+ if (!keyFor[provider]) {
511
+ throw new ConfigurationError(
512
+ `${humanName[provider]} is required for ${this.metadata.name} evaluator. Pass ${configKey[provider]} in config.`
513
+ );
514
+ }
515
+ }
516
+ }
344
517
  /**
345
518
  * Normalize telemetry config to standard format
346
519
  */
@@ -421,6 +594,33 @@ var BaseEvaluator = class {
421
594
  );
422
595
  }
423
596
  }
597
+ /**
598
+ * Create an LLM provider, honouring modelOverride if set.
599
+ * When override is active, the key for the override provider is resolved
600
+ * from the matching top-level config field (e.g. anthropicApiKey for Anthropic).
601
+ */
602
+ createConfiguredProvider(defaultType, defaultModel, defaultApiKey) {
603
+ const override = this.config.modelOverride;
604
+ if (override) {
605
+ const apiKeyFor = {
606
+ ["openai" /* OpenAI */]: this.config.openaiApiKey,
607
+ ["google" /* Google */]: this.config.googleApiKey,
608
+ ["anthropic" /* Anthropic */]: this.config.anthropicApiKey
609
+ };
610
+ return createProvider({
611
+ type: override.provider,
612
+ model: override.model,
613
+ apiKey: apiKeyFor[override.provider],
614
+ maxRetries: this.config.maxRetries
615
+ });
616
+ }
617
+ return createProvider({
618
+ type: defaultType,
619
+ model: defaultModel,
620
+ apiKey: defaultApiKey,
621
+ maxRetries: this.config.maxRetries
622
+ });
623
+ }
424
624
  /**
425
625
  * Send telemetry event to analytics service
426
626
  * Common helper for all evaluators
@@ -441,123 +641,12 @@ var BaseEvaluator = class {
441
641
  provider: params.provider,
442
642
  token_usage: params.tokenUsage,
443
643
  metadata: params.metadata,
644
+ model_override: this.config.modelOverride ? true : void 0,
444
645
  // Include input text only if recording is enabled
445
646
  input_text: this.config.telemetry.recordInputs ? params.inputText : void 0
446
647
  });
447
648
  }
448
649
  };
449
- var DEFAULT_MODELS = {
450
- openai: "gpt-4o",
451
- anthropic: "claude-sonnet-4-5-20250929",
452
- google: "gemini-2.5-pro"
453
- };
454
- var VercelAIProvider = class {
455
- constructor(config) {
456
- this.config = config;
457
- if (config.type === "custom") {
458
- throw new Error(
459
- "VercelAIProvider does not support custom type. Use config.customProvider directly."
460
- );
461
- }
462
- }
463
- /**
464
- * Generate structured output using Vercel AI SDK's generateText with output
465
- */
466
- async generateStructured(request) {
467
- const model = await this.getModel(request.model);
468
- const startTime = Date.now();
469
- const { output, usage } = await generateText({
470
- model,
471
- messages: request.messages,
472
- output: Output.object({ schema: request.schema }),
473
- temperature: request.temperature ?? 0,
474
- maxRetries: this.config.maxRetries ?? 0,
475
- ...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
476
- });
477
- return {
478
- data: output,
479
- model: request.model || this.getDefaultModel(),
480
- usage: {
481
- inputTokens: usage.inputTokens || 0,
482
- outputTokens: usage.outputTokens || 0
483
- },
484
- latencyMs: Date.now() - startTime
485
- };
486
- }
487
- /**
488
- * Generate plain text using Vercel AI SDK's generateText
489
- */
490
- async generateText(messages, temperature) {
491
- const model = await this.getModel();
492
- const startTime = Date.now();
493
- const { text, usage } = await generateText({
494
- model,
495
- messages,
496
- temperature: temperature ?? this.config.temperature ?? 0,
497
- maxRetries: this.config.maxRetries ?? 0
498
- });
499
- return {
500
- text,
501
- usage: {
502
- inputTokens: usage.inputTokens || 0,
503
- outputTokens: usage.outputTokens || 0
504
- },
505
- latencyMs: Date.now() - startTime
506
- };
507
- }
508
- /**
509
- * Get the configured language model.
510
- * Uses dynamic imports so consumers only need to install the provider packages they use.
511
- */
512
- async getModel(requestModel) {
513
- const modelId = requestModel || this.config.model || this.getDefaultModel();
514
- const apiKey = this.config.apiKey;
515
- switch (this.config.type) {
516
- case "openai": {
517
- const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
518
- throw new Error(
519
- "To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
520
- );
521
- });
522
- return createOpenAI(apiKey ? { apiKey } : {})(modelId);
523
- }
524
- case "anthropic": {
525
- const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
526
- throw new Error(
527
- "To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
528
- );
529
- });
530
- return createAnthropic(apiKey ? { apiKey } : {})(modelId);
531
- }
532
- case "google": {
533
- const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
534
- throw new Error(
535
- "To use the Google provider, install its adapter: npm install @ai-sdk/google"
536
- );
537
- });
538
- return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(modelId);
539
- }
540
- default:
541
- throw new Error(`Unsupported provider type: ${this.config.type}`);
542
- }
543
- }
544
- /**
545
- * Get default model for the configured provider
546
- */
547
- getDefaultModel() {
548
- const providerType = this.config.type;
549
- if (providerType === "custom") {
550
- throw new Error("Cannot get default model for custom provider type");
551
- }
552
- return DEFAULT_MODELS[providerType];
553
- }
554
- };
555
- function createProvider(config) {
556
- if (config.type === "custom" && config.customProvider) {
557
- return config.customProvider;
558
- }
559
- return new VercelAIProvider(config);
560
- }
561
650
  var TextComplexityLevel = z.enum([
562
651
  "Slightly complex",
563
652
  "Moderately complex",
@@ -759,6 +848,44 @@ function featuresToJSON(features, decimals = 1, castToInt = true) {
759
848
  }
760
849
  return JSON.stringify(payload, null, 2);
761
850
  }
851
+ var LIBRARY_ADAPTERS = {
852
+ "text-readability": {
853
+ call(fnName, text) {
854
+ const fn = textReadability[fnName];
855
+ if (typeof fn !== "function") {
856
+ throw new Error(`Function "${fnName}" not found in text-readability.`);
857
+ }
858
+ return fn.call(textReadability, text);
859
+ }
860
+ }
861
+ };
862
+ var POST_TRANSFORMS = {
863
+ round(value, { precision = 0 }) {
864
+ const factor = 10 ** precision;
865
+ return Math.round(value * factor) / factor;
866
+ }
867
+ };
868
+ function runPreprocessingStep(text, impl) {
869
+ const adapter = LIBRARY_ADAPTERS[impl.library];
870
+ if (!adapter) {
871
+ const supported = Object.keys(LIBRARY_ADAPTERS).join(", ");
872
+ throw new Error(
873
+ `Unsupported preprocessing library "${impl.library}". Supported: ${supported}.`
874
+ );
875
+ }
876
+ let result = adapter.call(impl.function, text);
877
+ if (impl.post_transform) {
878
+ const transform = POST_TRANSFORMS[impl.post_transform.type];
879
+ if (!transform) {
880
+ const supported = Object.keys(POST_TRANSFORMS).join(", ");
881
+ throw new Error(
882
+ `Unsupported post_transform type "${impl.post_transform.type}". Supported: ${supported}.`
883
+ );
884
+ }
885
+ result = transform(result, impl.post_transform);
886
+ }
887
+ return result;
888
+ }
762
889
 
763
890
  // ../../evals/prompts/vocabulary/background-knowledge.txt
764
891
  var background_knowledge_default = `
@@ -1064,32 +1191,28 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1064
1191
  name: "Vocabulary",
1065
1192
  description: "Evaluates vocabulary complexity of educational texts relative to grade level",
1066
1193
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1067
- requiresGoogleKey: true,
1068
- requiresOpenAIKey: true
1194
+ defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
1069
1195
  };
1070
1196
  grades34ComplexityProvider;
1071
1197
  otherGradesComplexityProvider;
1072
1198
  backgroundKnowledgeProvider;
1073
1199
  constructor(config) {
1074
1200
  super(config);
1075
- this.grades34ComplexityProvider = createProvider({
1076
- type: "google",
1077
- model: "gemini-2.5-pro",
1078
- apiKey: config.googleApiKey,
1079
- maxRetries: this.config.maxRetries
1080
- });
1081
- this.otherGradesComplexityProvider = createProvider({
1082
- type: "openai",
1083
- model: "gpt-4.1-2025-04-14",
1084
- apiKey: config.openaiApiKey,
1085
- maxRetries: this.config.maxRetries
1086
- });
1087
- this.backgroundKnowledgeProvider = createProvider({
1088
- type: "openai",
1089
- model: "gpt-4o-2024-11-20",
1090
- apiKey: config.openaiApiKey,
1091
- maxRetries: this.config.maxRetries
1092
- });
1201
+ this.grades34ComplexityProvider = this.createConfiguredProvider(
1202
+ "google" /* Google */,
1203
+ "gemini-2.5-pro",
1204
+ config.googleApiKey
1205
+ );
1206
+ this.otherGradesComplexityProvider = this.createConfiguredProvider(
1207
+ "openai" /* OpenAI */,
1208
+ "gpt-4.1-2025-04-14",
1209
+ config.openaiApiKey
1210
+ );
1211
+ this.backgroundKnowledgeProvider = this.createConfiguredProvider(
1212
+ "openai" /* OpenAI */,
1213
+ "gpt-4o-2024-11-20",
1214
+ config.openaiApiKey
1215
+ );
1093
1216
  }
1094
1217
  /**
1095
1218
  * Evaluate vocabulary complexity for a given text and grade level
@@ -1098,6 +1221,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1098
1221
  * @param grade - The target grade level (3-12)
1099
1222
  * @returns Evaluation result with complexity score and detailed analysis
1100
1223
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1224
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1101
1225
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1102
1226
  */
1103
1227
  async evaluate(text, grade) {
@@ -1109,7 +1233,9 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1109
1233
  });
1110
1234
  const startTime = Date.now();
1111
1235
  const stageDetails = [];
1112
- const complexityProviderName = grade === "3" || grade === "4" ? "google:gemini-2.5-pro" : "openai:gpt-4.1-2025-04-14";
1236
+ const complexityProviderLabel = grade === "3" || grade === "4" ? this.grades34ComplexityProvider.label : this.otherGradesComplexityProvider.label;
1237
+ const backgroundProviderLabel = this.backgroundKnowledgeProvider.label;
1238
+ const modelLabel = this.config.modelOverride ? backgroundProviderLabel : `${backgroundProviderLabel}+${complexityProviderLabel}`;
1113
1239
  try {
1114
1240
  this.validateText(text);
1115
1241
  this.validateGrade(grade, new Set(_VocabularyEvaluator.metadata.supportedGrades));
@@ -1120,7 +1246,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1120
1246
  const bgResponse = await this.getBackgroundKnowledgeAssumption(text, grade);
1121
1247
  stageDetails.push({
1122
1248
  stage: "background_knowledge",
1123
- provider: "openai:gpt-4o-2024-11-20",
1249
+ provider: backgroundProviderLabel,
1124
1250
  latency_ms: bgResponse.latencyMs,
1125
1251
  token_usage: {
1126
1252
  input_tokens: bgResponse.usage.inputTokens,
@@ -1136,7 +1262,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1136
1262
  );
1137
1263
  stageDetails.push({
1138
1264
  stage: "complexity_evaluation",
1139
- provider: complexityProviderName,
1265
+ provider: complexityProviderLabel,
1140
1266
  latency_ms: complexityResponse.latencyMs,
1141
1267
  token_usage: {
1142
1268
  input_tokens: complexityResponse.usage.inputTokens,
@@ -1152,7 +1278,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1152
1278
  score: complexityResponse.data.complexity_score,
1153
1279
  reasoning: complexityResponse.data.reasoning,
1154
1280
  metadata: {
1155
- model: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1281
+ model: modelLabel,
1156
1282
  processingTimeMs: latencyMs
1157
1283
  },
1158
1284
  _internal: complexityResponse.data
@@ -1162,7 +1288,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1162
1288
  latencyMs,
1163
1289
  textLength: text.length,
1164
1290
  grade,
1165
- provider: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1291
+ provider: modelLabel,
1166
1292
  tokenUsage: totalTokenUsage,
1167
1293
  metadata: {
1168
1294
  stage_details: stageDetails
@@ -1197,7 +1323,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1197
1323
  latencyMs,
1198
1324
  textLength: text.length,
1199
1325
  grade,
1200
- provider: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1326
+ provider: modelLabel,
1201
1327
  tokenUsage: totalTokenUsage,
1202
1328
  errorCode: error instanceof Error ? error.name : "UnknownError",
1203
1329
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -1415,25 +1541,12 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1415
1541
  name: "Sentence Structure",
1416
1542
  description: "Evaluates sentence structure complexity based on grammatical features",
1417
1543
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1418
- requiresGoogleKey: false,
1419
- requiresOpenAIKey: true
1544
+ defaultProviders: ["openai" /* OpenAI */]
1420
1545
  };
1421
- analysisProvider;
1422
- complexityProvider;
1546
+ provider;
1423
1547
  constructor(config) {
1424
1548
  super(config);
1425
- this.analysisProvider = createProvider({
1426
- type: "openai",
1427
- model: "gpt-4o",
1428
- apiKey: config.openaiApiKey,
1429
- maxRetries: this.config.maxRetries
1430
- });
1431
- this.complexityProvider = createProvider({
1432
- type: "openai",
1433
- model: "gpt-4o",
1434
- apiKey: config.openaiApiKey,
1435
- maxRetries: this.config.maxRetries
1436
- });
1549
+ this.provider = this.createConfiguredProvider("openai" /* OpenAI */, "gpt-4o", config.openaiApiKey);
1437
1550
  }
1438
1551
  /**
1439
1552
  * Evaluate sentence structure complexity for a given text and grade level
@@ -1442,6 +1555,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1442
1555
  * @param grade - The target grade level (3-12)
1443
1556
  * @returns Evaluation result with complexity score and detailed analysis
1444
1557
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1558
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1445
1559
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1446
1560
  */
1447
1561
  async evaluate(text, grade) {
@@ -1463,7 +1577,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1463
1577
  const analysisResponse = await this.analyzeSentenceStructure(text);
1464
1578
  stageDetails.push({
1465
1579
  stage: "sentence_analysis",
1466
- provider: "openai:gpt-4o",
1580
+ provider: this.provider.label,
1467
1581
  latency_ms: analysisResponse.latencyMs,
1468
1582
  token_usage: {
1469
1583
  input_tokens: analysisResponse.usage.inputTokens,
@@ -1478,7 +1592,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1478
1592
  const complexityResponse = await this.classifyComplexity(features, grade, text);
1479
1593
  stageDetails.push({
1480
1594
  stage: "complexity_classification",
1481
- provider: "openai:gpt-4o",
1595
+ provider: this.provider.label,
1482
1596
  latency_ms: complexityResponse.latencyMs,
1483
1597
  token_usage: {
1484
1598
  input_tokens: complexityResponse.usage.inputTokens,
@@ -1494,7 +1608,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1494
1608
  score: complexityResponse.data.answer,
1495
1609
  reasoning: complexityResponse.data.reasoning,
1496
1610
  metadata: {
1497
- model: "openai:gpt-4o",
1611
+ model: this.provider.label,
1498
1612
  processingTimeMs: latencyMs
1499
1613
  },
1500
1614
  _internal: {
@@ -1508,7 +1622,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1508
1622
  latencyMs,
1509
1623
  textLength: text.length,
1510
1624
  grade,
1511
- provider: "openai:gpt-4o",
1625
+ provider: this.provider.label,
1512
1626
  tokenUsage: totalTokenUsage,
1513
1627
  metadata: {
1514
1628
  stage_details: stageDetails
@@ -1543,7 +1657,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1543
1657
  latencyMs,
1544
1658
  textLength: text.length,
1545
1659
  grade,
1546
- provider: "openai:gpt-4o",
1660
+ provider: this.provider.label,
1547
1661
  tokenUsage: totalTokenUsage,
1548
1662
  errorCode: error instanceof Error ? error.name : "UnknownError",
1549
1663
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -1571,7 +1685,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1571
1685
  `flesch_kincaid_grade: ${metrics.fleschKincaidGrade}`
1572
1686
  ].join("\n");
1573
1687
  const userPrompt = getUserPromptAnalysis(text, gtCountsStr);
1574
- const response = await this.analysisProvider.generateStructured({
1688
+ const response = await this.provider.generateStructured({
1575
1689
  messages: [
1576
1690
  { role: "system", content: getSystemPromptAnalysis() },
1577
1691
  { role: "user", content: userPrompt }
@@ -1593,7 +1707,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1593
1707
  async classifyComplexity(features, grade, excerpt) {
1594
1708
  const featuresJSON = featuresToJSON(features, 1, true);
1595
1709
  const userPrompt = getUserPromptComplexity(featuresJSON, grade, excerpt);
1596
- const response = await this.complexityProvider.generateStructured({
1710
+ const response = await this.provider.generateStructured({
1597
1711
  messages: [
1598
1712
  { role: "system", content: getSystemPromptComplexity() },
1599
1713
  { role: "user", content: userPrompt }
@@ -1649,18 +1763,16 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1649
1763
  description: "Determines appropriate grade level for text with scaffolding recommendations",
1650
1764
  supportedGrades: [],
1651
1765
  // No grade parameter required - evaluates what grade the text is appropriate for
1652
- requiresGoogleKey: true,
1653
- requiresOpenAIKey: false
1766
+ defaultProviders: ["google" /* Google */]
1654
1767
  };
1655
1768
  provider;
1656
1769
  constructor(config) {
1657
1770
  super(config);
1658
- this.provider = createProvider({
1659
- type: "google",
1660
- model: "gemini-2.5-pro",
1661
- apiKey: config.googleApiKey,
1662
- maxRetries: this.config.maxRetries
1663
- });
1771
+ this.provider = this.createConfiguredProvider(
1772
+ "google" /* Google */,
1773
+ "gemini-2.5-pro",
1774
+ config.googleApiKey
1775
+ );
1664
1776
  }
1665
1777
  /**
1666
1778
  * Evaluate grade level appropriateness for a given text
@@ -1668,6 +1780,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1668
1780
  * @param text - The text to evaluate
1669
1781
  * @returns Evaluation result with grade recommendations and scaffolding suggestions
1670
1782
  * @throws {ValidationError} If text is empty or too short/long
1783
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1671
1784
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1672
1785
  */
1673
1786
  async evaluate(text) {
@@ -1701,7 +1814,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1701
1814
  score: response.data.grade,
1702
1815
  reasoning: response.data.reasoning,
1703
1816
  metadata: {
1704
- model: "google:gemini-2.5-pro",
1817
+ model: this.provider.label,
1705
1818
  processingTimeMs: latencyMs
1706
1819
  },
1707
1820
  _internal: response.data
@@ -1710,7 +1823,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1710
1823
  status: "success",
1711
1824
  latencyMs,
1712
1825
  textLength: text.length,
1713
- provider: "google:gemini-2.5-pro",
1826
+ provider: this.provider.label,
1714
1827
  tokenUsage,
1715
1828
  // No metadata.stage_details for single-stage evaluator
1716
1829
  inputText: text
@@ -1735,7 +1848,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1735
1848
  status: "error",
1736
1849
  latencyMs,
1737
1850
  textLength: text.length,
1738
- provider: "google:gemini-2.5-pro",
1851
+ provider: this.provider.label,
1739
1852
  errorCode: error instanceof Error ? error.name : "UnknownError",
1740
1853
  inputText: text
1741
1854
  }).catch(() => {
@@ -1846,18 +1959,16 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1846
1959
  name: "Subject Matter Knowledge",
1847
1960
  description: "Evaluates background knowledge demands of educational texts relative to grade level",
1848
1961
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1849
- requiresGoogleKey: true,
1850
- requiresOpenAIKey: false
1962
+ defaultProviders: ["google" /* Google */]
1851
1963
  };
1852
1964
  provider;
1853
1965
  constructor(config) {
1854
1966
  super(config);
1855
- this.provider = createProvider({
1856
- type: "google",
1857
- model: "gemini-3-flash-preview",
1858
- apiKey: config.googleApiKey,
1859
- maxRetries: this.config.maxRetries
1860
- });
1967
+ this.provider = this.createConfiguredProvider(
1968
+ "google" /* Google */,
1969
+ "gemini-3-flash-preview",
1970
+ config.googleApiKey
1971
+ );
1861
1972
  }
1862
1973
  /**
1863
1974
  * Evaluate subject matter knowledge complexity for a given text and grade level
@@ -1866,6 +1977,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1866
1977
  * @param grade - The target grade level (3-12)
1867
1978
  * @returns Evaluation result with complexity score and detailed analysis
1868
1979
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1980
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1869
1981
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1870
1982
  */
1871
1983
  async evaluate(text, grade) {
@@ -1888,7 +2000,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1888
2000
  const response = await this.evaluateSmk(text, grade, fkScore);
1889
2001
  stageDetails.push({
1890
2002
  stage: "smk_evaluation",
1891
- provider: "google:gemini-3-flash-preview",
2003
+ provider: this.provider.label,
1892
2004
  latency_ms: response.latencyMs,
1893
2005
  token_usage: {
1894
2006
  input_tokens: response.usage.inputTokens,
@@ -1904,7 +2016,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1904
2016
  score: response.data.complexity_score,
1905
2017
  reasoning: response.data.reasoning,
1906
2018
  metadata: {
1907
- model: "google:gemini-3-flash-preview",
2019
+ model: this.provider.label,
1908
2020
  processingTimeMs: latencyMs
1909
2021
  },
1910
2022
  _internal: response.data
@@ -1914,7 +2026,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1914
2026
  latencyMs,
1915
2027
  textLength: text.length,
1916
2028
  grade,
1917
- provider: "google:gemini-3-flash-preview",
2029
+ provider: this.provider.label,
1918
2030
  tokenUsage: totalTokenUsage,
1919
2031
  metadata: {
1920
2032
  stage_details: stageDetails
@@ -1949,7 +2061,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1949
2061
  latencyMs,
1950
2062
  textLength: text.length,
1951
2063
  grade,
1952
- provider: "google:gemini-3-flash-preview",
2064
+ provider: this.provider.label,
1953
2065
  tokenUsage: totalTokenUsage,
1954
2066
  errorCode: error instanceof Error ? error.name : "UnknownError",
1955
2067
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -2053,18 +2165,16 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2053
2165
  name: "Conventionality",
2054
2166
  description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
2055
2167
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
2056
- requiresGoogleKey: true,
2057
- requiresOpenAIKey: false
2168
+ defaultProviders: ["google" /* Google */]
2058
2169
  };
2059
2170
  provider;
2060
2171
  constructor(config) {
2061
2172
  super(config);
2062
- this.provider = createProvider({
2063
- type: "google",
2064
- model: "gemini-3-flash-preview",
2065
- apiKey: config.googleApiKey,
2066
- maxRetries: this.config.maxRetries
2067
- });
2173
+ this.provider = this.createConfiguredProvider(
2174
+ "google" /* Google */,
2175
+ "gemini-3-flash-preview",
2176
+ config.googleApiKey
2177
+ );
2068
2178
  }
2069
2179
  /**
2070
2180
  * Evaluate conventionality complexity for a given text and grade level
@@ -2073,6 +2183,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2073
2183
  * @param grade - The target grade level (3-12)
2074
2184
  * @returns Evaluation result with complexity score and detailed analysis
2075
2185
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2186
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2076
2187
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2077
2188
  */
2078
2189
  async evaluate(text, grade) {
@@ -2095,7 +2206,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2095
2206
  const response = await this.evaluateConventionality(text, grade, fkScore);
2096
2207
  stageDetails.push({
2097
2208
  stage: "conventionality_evaluation",
2098
- provider: "google:gemini-3-flash-preview",
2209
+ provider: this.provider.label,
2099
2210
  latency_ms: response.latencyMs,
2100
2211
  token_usage: {
2101
2212
  input_tokens: response.usage.inputTokens,
@@ -2111,7 +2222,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2111
2222
  score: response.data.complexity_score,
2112
2223
  reasoning: response.data.reasoning,
2113
2224
  metadata: {
2114
- model: "google:gemini-3-flash-preview",
2225
+ model: this.provider.label,
2115
2226
  processingTimeMs: latencyMs
2116
2227
  },
2117
2228
  _internal: response.data
@@ -2121,7 +2232,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2121
2232
  latencyMs,
2122
2233
  textLength: text.length,
2123
2234
  grade,
2124
- provider: "google:gemini-3-flash-preview",
2235
+ provider: this.provider.label,
2125
2236
  tokenUsage: totalTokenUsage,
2126
2237
  metadata: {
2127
2238
  stage_details: stageDetails
@@ -2156,7 +2267,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2156
2267
  latencyMs,
2157
2268
  textLength: text.length,
2158
2269
  grade,
2159
- provider: "google:gemini-3-flash-preview",
2270
+ provider: this.provider.label,
2160
2271
  tokenUsage: totalTokenUsage,
2161
2272
  errorCode: error instanceof Error ? error.name : "UnknownError",
2162
2273
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -2188,6 +2299,276 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2188
2299
  };
2189
2300
  }
2190
2301
  };
2302
+ var PurposeOutputSchema = z.object({ "complexity_score": z.enum(["slightly_complex", "moderately_complex", "very_complex", "exceedingly_complex", "more_context_needed"]).describe("The Purpose complexity level for the target grade."), "reasoning": z.string().describe("A high-level summary of why the text is at this complexity level for the target grade."), "details": z.object({ "detailed_summary": z.array(z.object({ "factor": z.string().describe("The specific text complexity factor identified."), "description": z.string().describe("How this factor manifests in the text."), "effect_on_complexity_dimension": z.string().describe("How this factor affects the reader's ability to understand the text's specific complexity dimension.") }).strict()).describe("Individual complexity factors with descriptions and their effects."), "adjustment_and_scaffolding": z.array(z.object({ "scaffolding_need": z.string().describe("The complexity factor that requires scaffolding."), "suggestion": z.string().describe("A specific instructional strategy to support students with this factor.") }).strict()).describe("Scaffolding strategies to make the text accessible at the target grade."), "recommended_use_cases": z.array(z.object({ "opportunity": z.string().describe("An instructional opportunity related to the text."), "suggestion": z.string().describe("A specific way to leverage this text for that instructional purpose.") }).strict()).describe("Additional instructional opportunities for using this text.") }).strict().describe("Practical instructional details including scaffolding strategies and recommended use cases.") }).strict();
2303
+
2304
+ // ../../evals/prompts/purpose/system.txt
2305
+ var system_default4 = '\n Role\n You are an expert reading assessment evaluator. Your task is to determine the Text Complexity of a given passage based exclusively on the Purpose dimension of the qualitative measures rubric.\n\n Task Details\n You will be provided with an informational or literary `text`, along with its `grade_level` and `fk_score` (Flesch-Kincaid). You must analyze the text and determine how difficult it is for a reader to identify the author\'s purpose. \n\n Crucially, you must distinguish between the text\'s *topic* (what it is about) and its *purpose* (why the author wrote it). \n\n Rubric: Purpose Complexity\n Exceedingly Complex: Subtle and intricate, difficult to determine; includes many theoretical or abstract elements.\n Very Complex: Implicit or subtle but fairly easy to infer; more theoretical or abstract than concrete.\n Moderately Complex: Implied but easy to identify based upon context or source.\n Slightly Complex: Explicitly stated, clear, concrete, narrowly focused.\n More Context Needed: The text is a fragment or lacks necessary introductory context, making the true purpose impossible to determine accurately without external background knowledge.\n\n Expert Rules for Evaluating Purpose\n Based on expert consensus and historical grading corrections, you must apply the following heuristics:\n\n 1. The "Slightly Complex" Benchmark (Straightforward and Explicit)\n A text is Slightly Complex if its purpose is explicitly stated or if its informative intent is straightforward, clear, concrete, and directly answers what the text is immediately about. If the text opens by clearly identifying a concrete topic (e.g., "Pins are made of either brass or iron wire") and rigidly follows through by explaining factual, practical information or a process (like manufacturing steps or geographic facts), the purpose is considered explicit and straightforward. It does *not* require a literal statement like "The purpose of this text is to..." as long as the delivery of information is direct, clear, and unadorned by persuasive elements or complex framing.\n\n 2. Moderately Complex via Guiding Questions & Inquiry Formats\n If a text begins with a general introduction and uses guiding questions (e.g., "Have you ever wondered how clouds are formed?") to transition into an explanation, the purpose is implied rather than explicitly stated upfront. Because the reader must recognize the question as the pivot point for the author\'s intent, it is Moderately Complex.\n\n 3. Moderately Complex via Multiple Distinct Informational Goals\n If a text covers a broad topic but jumps between several distinct scientific or informational objectives without an overarching framing device or explicit thesis (e.g., talking about measuring ice sheets, then mapping, then finding meteorites), the reader must synthesize these diverse facts to recognize the broader purpose, making it Moderately Complex.\n\n 4. Moderately Complex via Arguments Disguised as Information\n If an author is arguing a specific point, correcting a misconception, or defending a stance, but the text could initially be mistaken by students as purely informative factual text, it is Moderately Complex. The reader must infer the persuasive intent or argumentative purpose beneath the informative tone.\n\n 5. "More Context Needed" for Fragments\n If a text is a fragment missing a crucial introduction or context, and identifying the author\'s purpose beyond a simple surface-level description would be exceptionally difficult for a reader in the target grade level without that external background, score it as `more_context_needed`. \n\n Output Format\n Provide your evaluation in the following structure:\n reasoning:\n - Surface Analysis: Identify if the text clearly identifies its topic and delivers straightforward facts, or if it utilizes structural cues, titles, or direct thesis statements.\n - Subtlety & Framing: Is the informative purpose straightforward and concrete? Does it use guiding questions? Is it an argument disguised as pure information? Are there multiple distinct informational goals requiring synthesis?\n - Context Check: Is this text a fragment missing crucial context that obscures the deeper purpose for the target grade level?\n - Rubric Alignment: Explain how the text aligns with the specific language of the rubric, explicitly referencing the expert rules above. Justify why it isn\'t one level simpler or more complex.\n\n answer:\n - complexity_score: (slightly_complex, moderately_complex, very_complex, exceedingly_complex, more_context_needed)\n - reasoning: A brief summary of your final decision.\n - details: Structured breakdown of PurposeDetails including detailed_summary, adjustment_and_scaffolding, and recommended_use_cases.\n';
2306
+
2307
+ // ../../evals/prompts/purpose/user.txt
2308
+ var user_default4 = "Analyze:\nText: {text}\nGrade: {grade_level}\nFK Score: {fk_score}";
2309
+
2310
+ // ../../evals/prompts/purpose/config.json
2311
+ var config_default = {
2312
+ evaluator: {
2313
+ id: "literacy.gla.purpose",
2314
+ name: "Purpose Dimension Text Complexity Evaluator",
2315
+ description: "Evaluates the Purpose dimension of qualitative text complexity for K-12 reading assessment, producing a 5-level rubric rating with structured pedagogical detail."
2316
+ },
2317
+ preprocessing: [
2318
+ {
2319
+ id: "fk_score",
2320
+ kind: "flesch_kincaid_grade",
2321
+ description: "Compute the Flesch-Kincaid Grade Level for the input text and bind it to {fk_score} in the prompt.",
2322
+ input: "text",
2323
+ output: "fk_score",
2324
+ implementation: {
2325
+ python: {
2326
+ library: "textstat",
2327
+ function: "flesch_kincaid_grade",
2328
+ post_transform: {
2329
+ type: "round",
2330
+ precision: 2
2331
+ }
2332
+ },
2333
+ typescript: {
2334
+ library: "text-readability",
2335
+ function: "fleschKincaidGrade",
2336
+ post_transform: {
2337
+ type: "round",
2338
+ precision: 2
2339
+ }
2340
+ }
2341
+ }
2342
+ }
2343
+ ],
2344
+ steps: [
2345
+ {
2346
+ id: "evaluate_purpose",
2347
+ description: "Single-call LLM step that produces the EvaluatorOutput JSON.",
2348
+ prompt: {
2349
+ type: "chat",
2350
+ messages: [
2351
+ {
2352
+ role: "system",
2353
+ source_path: "system.txt",
2354
+ sha256: "745b95b7d54dc845b99363c9d3360355381883c22a5f6a0f305d7349cae38a54"
2355
+ },
2356
+ {
2357
+ role: "user",
2358
+ source_path: "user.txt",
2359
+ sha256: "cd8e6347db1a55d104e34436f8f66e833bd6583645d4786a554aaefdd26479b2"
2360
+ }
2361
+ ],
2362
+ placeholders: {
2363
+ text: {
2364
+ required: true,
2365
+ source: "input"
2366
+ },
2367
+ grade_level: {
2368
+ required: true,
2369
+ source: "input"
2370
+ },
2371
+ fk_score: {
2372
+ required: true,
2373
+ source: "preprocessing.fk_score"
2374
+ }
2375
+ }
2376
+ },
2377
+ model: {
2378
+ provider: "google",
2379
+ name: "gemini-3-flash-preview"
2380
+ },
2381
+ generation: {
2382
+ temperature: 0
2383
+ },
2384
+ parser: {
2385
+ kind: "structured_output"
2386
+ },
2387
+ output_binding: "formatted_output"
2388
+ }
2389
+ ]};
2390
+
2391
+ // src/prompts/purpose/index.ts
2392
+ var STEP_ID = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
2393
+ var _step = config_default.steps.find((s) => s.id === STEP_ID);
2394
+ if (!_step) throw new Error(`Step "${STEP_ID}" not found in purpose config.json`);
2395
+ var PLACEHOLDER_KEYS = Object.keys(_step.prompt.placeholders);
2396
+ function applyPlaceholders(template, inputs) {
2397
+ return PLACEHOLDER_KEYS.reduce(
2398
+ (text, key) => key in inputs ? text.replaceAll(`{${key}}`, inputs[key]) : text,
2399
+ template
2400
+ );
2401
+ }
2402
+ function getSystemPrompt5(inputs) {
2403
+ return applyPlaceholders(system_default4, inputs);
2404
+ }
2405
+ function getUserPrompt5(inputs) {
2406
+ return applyPlaceholders(user_default4, inputs);
2407
+ }
2408
+
2409
+ // ../../evals/prompts/purpose/input_schema.json
2410
+ var input_schema_default = {
2411
+ properties: {
2412
+ grade_level: {
2413
+ minimum: 3,
2414
+ maximum: 12}
2415
+ }
2416
+ };
2417
+
2418
+ // src/evaluators/purpose.ts
2419
+ var STEP_ID2 = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
2420
+ var _step2 = config_default.steps.find((s) => s.id === STEP_ID2);
2421
+ if (!_step2) throw new Error(`Step "${STEP_ID2}" not found in purpose config.json`);
2422
+ var STEP = _step2;
2423
+ var GRADE_MIN = input_schema_default.properties.grade_level.minimum;
2424
+ var GRADE_MAX = input_schema_default.properties.grade_level.maximum;
2425
+ var SUPPORTED_GRADES = Array.from({ length: GRADE_MAX - GRADE_MIN + 1 }, (_, i) => String(GRADE_MIN + i));
2426
+ var COMPLEXITY_SCORE_DISPLAY = {
2427
+ "slightly_complex": "Slightly complex",
2428
+ "moderately_complex": "Moderately complex",
2429
+ "very_complex": "Very complex",
2430
+ "exceedingly_complex": "Exceedingly complex",
2431
+ "more_context_needed": "More context needed"
2432
+ };
2433
+ var PurposeEvaluator = class _PurposeEvaluator extends BaseEvaluator {
2434
+ static metadata = {
2435
+ id: config_default.evaluator.id,
2436
+ name: config_default.evaluator.name,
2437
+ description: config_default.evaluator.description,
2438
+ supportedGrades: SUPPORTED_GRADES,
2439
+ defaultProviders: ["google" /* Google */]
2440
+ };
2441
+ static TEMPERATURE = STEP.generation.temperature;
2442
+ static computeFkScore(text) {
2443
+ const fkStep = config_default.preprocessing.find((p) => p.id === "fk_score");
2444
+ if (!fkStep) throw new Error("fk_score preprocessing step not found in purpose config.json");
2445
+ return runPreprocessingStep(text, fkStep.implementation.typescript);
2446
+ }
2447
+ provider;
2448
+ constructor(config) {
2449
+ super(config);
2450
+ this.provider = this.createConfiguredProvider(
2451
+ "google" /* Google */,
2452
+ STEP.model.name,
2453
+ config.googleApiKey
2454
+ );
2455
+ }
2456
+ /**
2457
+ * Evaluate purpose complexity for a given text and grade level
2458
+ *
2459
+ * @param text - The text to evaluate
2460
+ * @param grade - The target grade level (3-12)
2461
+ * @returns Evaluation result with complexity score and detailed analysis
2462
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2463
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2464
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2465
+ */
2466
+ async evaluate(text, grade) {
2467
+ this.logger.info("Starting Purpose evaluation", {
2468
+ evaluator: _PurposeEvaluator.metadata.id,
2469
+ operation: "evaluate",
2470
+ grade,
2471
+ textLength: text.length
2472
+ });
2473
+ const startTime = Date.now();
2474
+ const stageDetails = [];
2475
+ try {
2476
+ this.validateText(text);
2477
+ const gradeNum = this.parseAndValidateGrade(grade);
2478
+ const fkScore = _PurposeEvaluator.computeFkScore(text);
2479
+ const inputs = {
2480
+ text,
2481
+ grade_level: String(gradeNum),
2482
+ fk_score: String(fkScore)
2483
+ };
2484
+ const response = await this.callLLM(inputs);
2485
+ const latencyMs = Date.now() - startTime;
2486
+ const tokenUsage = {
2487
+ input_tokens: response.usage.inputTokens,
2488
+ output_tokens: response.usage.outputTokens
2489
+ };
2490
+ stageDetails.push({
2491
+ stage: STEP.id,
2492
+ provider: this.provider.label,
2493
+ latency_ms: response.latencyMs,
2494
+ token_usage: tokenUsage
2495
+ });
2496
+ const result = {
2497
+ score: COMPLEXITY_SCORE_DISPLAY[response.data.complexity_score],
2498
+ reasoning: response.data.reasoning,
2499
+ metadata: {
2500
+ model: this.provider.label,
2501
+ processingTimeMs: latencyMs
2502
+ },
2503
+ _internal: response.data
2504
+ };
2505
+ this.sendTelemetry({
2506
+ status: "success",
2507
+ latencyMs,
2508
+ textLength: text.length,
2509
+ grade: String(gradeNum),
2510
+ provider: this.provider.label,
2511
+ tokenUsage,
2512
+ metadata: { stage_details: stageDetails },
2513
+ inputText: text
2514
+ }).catch(() => void 0);
2515
+ this.logger.info("Purpose evaluation completed successfully", {
2516
+ evaluator: _PurposeEvaluator.metadata.id,
2517
+ operation: "evaluate",
2518
+ grade: gradeNum,
2519
+ score: result.score,
2520
+ processingTimeMs: latencyMs
2521
+ });
2522
+ return result;
2523
+ } catch (error) {
2524
+ const latencyMs = Date.now() - startTime;
2525
+ this.logger.error("Purpose evaluation failed", {
2526
+ evaluator: _PurposeEvaluator.metadata.id,
2527
+ operation: "evaluate",
2528
+ grade,
2529
+ error: error instanceof Error ? error : void 0,
2530
+ processingTimeMs: latencyMs
2531
+ });
2532
+ const tokenUsage = stageDetails.length > 0 ? {
2533
+ input_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.input_tokens ?? 0), 0),
2534
+ output_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.output_tokens ?? 0), 0)
2535
+ } : void 0;
2536
+ this.sendTelemetry({
2537
+ status: "error",
2538
+ latencyMs,
2539
+ textLength: text.length,
2540
+ grade: String(grade),
2541
+ provider: this.provider.label,
2542
+ tokenUsage,
2543
+ errorCode: error instanceof Error ? error.name : "UnknownError",
2544
+ metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
2545
+ inputText: text
2546
+ }).catch(() => void 0);
2547
+ if (error instanceof ValidationError) throw error;
2548
+ throw wrapProviderError(error, "Purpose evaluation failed");
2549
+ }
2550
+ }
2551
+ parseAndValidateGrade(grade) {
2552
+ const num = Number(grade.trim());
2553
+ if (!Number.isInteger(num) || num < GRADE_MIN || num > GRADE_MAX) {
2554
+ throw new ValidationError(
2555
+ `Invalid grade "${grade}". Purpose evaluator supports integer grades ${GRADE_MIN}\u2013${GRADE_MAX}.`
2556
+ );
2557
+ }
2558
+ return num;
2559
+ }
2560
+ async callLLM(inputs) {
2561
+ const response = await this.provider.generateStructured({
2562
+ messages: [
2563
+ { role: "system", content: getSystemPrompt5(inputs) },
2564
+ { role: "user", content: getUserPrompt5(inputs) }
2565
+ ],
2566
+ schema: PurposeOutputSchema,
2567
+ temperature: _PurposeEvaluator.TEMPERATURE
2568
+ });
2569
+ return { data: response.data, usage: response.usage, latencyMs: response.latencyMs };
2570
+ }
2571
+ };
2191
2572
 
2192
2573
  // src/batch/evaluator.ts
2193
2574
  var EVALUATOR_MAP = /* @__PURE__ */ new Map([
@@ -2195,19 +2576,21 @@ var EVALUATOR_MAP = /* @__PURE__ */ new Map([
2195
2576
  [SmkEvaluator.metadata.id, SmkEvaluator],
2196
2577
  [VocabularyEvaluator.metadata.id, VocabularyEvaluator],
2197
2578
  [SentenceStructureEvaluator.metadata.id, SentenceStructureEvaluator],
2198
- [ConventionalityEvaluator.metadata.id, ConventionalityEvaluator]
2579
+ [ConventionalityEvaluator.metadata.id, ConventionalityEvaluator],
2580
+ [PurposeEvaluator.metadata.id, PurposeEvaluator]
2199
2581
  ]);
2200
2582
  var EVALUATOR_GROUPS = [
2201
2583
  {
2202
2584
  id: "text-complexity",
2203
2585
  name: "Text Complexity Analysis",
2204
- description: "Evaluates vocabulary complexity, sentence structure, subject matter knowledge, conventionality, and grade-level appropriateness",
2586
+ description: "Evaluates all dimensions of the Qualitative Text Complexity rubric",
2205
2587
  evaluatorIds: [
2206
2588
  GradeLevelAppropriatenessEvaluator.metadata.id,
2207
2589
  SmkEvaluator.metadata.id,
2208
2590
  VocabularyEvaluator.metadata.id,
2209
2591
  SentenceStructureEvaluator.metadata.id,
2210
- ConventionalityEvaluator.metadata.id
2592
+ ConventionalityEvaluator.metadata.id,
2593
+ PurposeEvaluator.metadata.id
2211
2594
  ],
2212
2595
  requiresGoogleKey: true,
2213
2596
  requiresOpenAIKey: true,
@@ -3298,6 +3681,8 @@ var COMPLEXITY_SCORE_MAP = {
3298
3681
  "moderately complex": 2,
3299
3682
  "very complex": 3,
3300
3683
  "exceedingly complex": 4
3684
+ // 'more context needed' has no numeric equivalent — rows with this score appear as N/A
3685
+ // in individual results and are excluded from aggregate stats, same as failed evaluations.
3301
3686
  };
3302
3687
  function evaluatorDisplayName(id) {
3303
3688
  return id.split("-").map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
@@ -3352,7 +3737,8 @@ function groupResultsByRow(results) {
3352
3737
  return grouped;
3353
3738
  }
3354
3739
  function formatEvaluatorPrefix(evaluatorId) {
3355
- return evaluatorId.replace(/-/g, "_");
3740
+ const slug = evaluatorId.includes(".") ? evaluatorId.split(".").pop() : evaluatorId;
3741
+ return slug.replace(/-/g, "_");
3356
3742
  }
3357
3743
  function escapeCSV(field) {
3358
3744
  if (field.includes(",") || field.includes('"') || field.includes("\n")) {