@learning-commons/evaluators 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ var ai = require('ai');
10
10
  var zod = require('zod');
11
11
  var nlp = require('compromise');
12
12
  var syllable = require('syllable');
13
+ var textReadability = require('text-readability');
13
14
  var sync = require('csv-parse/sync');
14
15
 
15
16
  var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null;
@@ -36,6 +37,7 @@ function _interopNamespace(e) {
36
37
  var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
37
38
  var fs__namespace = /*#__PURE__*/_interopNamespace(fs);
38
39
  var nlp__default = /*#__PURE__*/_interopDefault(nlp);
40
+ var textReadability__default = /*#__PURE__*/_interopDefault(textReadability);
39
41
 
40
42
  // src/batch/evaluator.ts
41
43
 
@@ -198,8 +200,9 @@ var TimeoutError = class extends APIError {
198
200
  function parseProviderError(error) {
199
201
  if (error instanceof Error) {
200
202
  const message = error.message;
203
+ const err = error;
201
204
  const statusMatch = message.match(/\b(4\d{2}|5\d{2})\b/);
202
- const statusCode = statusMatch ? parseInt(statusMatch[1]) : void 0;
205
+ const statusCode = err.statusCode ?? err.status ?? (statusMatch ? parseInt(statusMatch[1]) : void 0);
203
206
  return {
204
207
  message,
205
208
  statusCode,
@@ -212,6 +215,11 @@ function parseProviderError(error) {
212
215
  }
213
216
  function wrapProviderError(error, defaultMessage = "API request failed") {
214
217
  const { message, statusCode, code } = parseProviderError(error);
218
+ if (statusCode === 404 || statusCode === 400 && /\bmodel\b.*(not found|does not exist|invalid)/i.test(message)) {
219
+ return new ConfigurationError(
220
+ `Model not found or invalid: ${message}. Check the model ID passed to the provider.`
221
+ );
222
+ }
215
223
  if (statusCode === 401 || statusCode === 403) {
216
224
  return new AuthenticationError(
217
225
  message.includes("API key") ? message : "Invalid API key",
@@ -286,6 +294,111 @@ function createLogger(customLogger, level = 2 /* WARN */) {
286
294
  }
287
295
  return new ConsoleLogger(level);
288
296
  }
297
+ var VercelAIProvider = class {
298
+ constructor(config) {
299
+ this.config = config;
300
+ if (config.type === "custom") {
301
+ throw new Error(
302
+ "VercelAIProvider does not support custom type. Use config.customProvider directly."
303
+ );
304
+ }
305
+ if (!config.model || config.model.trim() === "") {
306
+ throw new Error(
307
+ `model is required for VercelAIProvider (type: "${config.type}"). No default is assumed.`
308
+ );
309
+ }
310
+ this.model = config.model;
311
+ this.label = `${config.type}:${config.model}`;
312
+ }
313
+ label;
314
+ model;
315
+ /**
316
+ * Generate structured output using Vercel AI SDK's generateText with output
317
+ */
318
+ async generateStructured(request) {
319
+ const model = await this.getModel();
320
+ const startTime = Date.now();
321
+ const { output, usage } = await ai.generateText({
322
+ model,
323
+ messages: request.messages,
324
+ output: ai.Output.object({ schema: request.schema }),
325
+ temperature: request.temperature ?? 0,
326
+ maxRetries: this.config.maxRetries ?? 0,
327
+ ...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
328
+ });
329
+ return {
330
+ data: output,
331
+ model: this.model,
332
+ usage: {
333
+ inputTokens: usage.inputTokens || 0,
334
+ outputTokens: usage.outputTokens || 0
335
+ },
336
+ latencyMs: Date.now() - startTime
337
+ };
338
+ }
339
+ /**
340
+ * Generate plain text using Vercel AI SDK's generateText
341
+ */
342
+ async generateText(messages, temperature) {
343
+ const model = await this.getModel();
344
+ const startTime = Date.now();
345
+ const { text, usage } = await ai.generateText({
346
+ model,
347
+ messages,
348
+ temperature: temperature ?? this.config.temperature ?? 0,
349
+ maxRetries: this.config.maxRetries ?? 0
350
+ });
351
+ return {
352
+ text,
353
+ usage: {
354
+ inputTokens: usage.inputTokens || 0,
355
+ outputTokens: usage.outputTokens || 0
356
+ },
357
+ latencyMs: Date.now() - startTime
358
+ };
359
+ }
360
+ /**
361
+ * Get the configured language model.
362
+ * Uses dynamic imports so consumers only need to install the provider packages they use.
363
+ */
364
+ async getModel() {
365
+ const apiKey = this.config.apiKey;
366
+ switch (this.config.type) {
367
+ case "openai": {
368
+ const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
369
+ throw new Error(
370
+ "To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
371
+ );
372
+ });
373
+ return createOpenAI(apiKey ? { apiKey } : {})(this.model);
374
+ }
375
+ case "anthropic": {
376
+ const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
377
+ throw new Error(
378
+ "To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
379
+ );
380
+ });
381
+ return createAnthropic(apiKey ? { apiKey } : {})(this.model);
382
+ }
383
+ case "google": {
384
+ const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
385
+ throw new Error(
386
+ "To use the Google provider, install its adapter: npm install @ai-sdk/google"
387
+ );
388
+ });
389
+ return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(this.model);
390
+ }
391
+ default:
392
+ throw new Error(`Unsupported provider type: ${this.config.type}`);
393
+ }
394
+ }
395
+ };
396
+ function createProvider(config) {
397
+ if (config.type === "custom" && config.customProvider) {
398
+ return config.customProvider;
399
+ }
400
+ return new VercelAIProvider(config);
401
+ }
289
402
 
290
403
  // src/evaluators/base.ts
291
404
  var VALIDATION_LIMITS = {
@@ -294,6 +407,12 @@ var VALIDATION_LIMITS = {
294
407
  /** Maximum text length in characters (100K chars ≈ 25K tokens) */
295
408
  MAX_TEXT_LENGTH: 1e5
296
409
  };
410
+ var Provider = /* @__PURE__ */ ((Provider2) => {
411
+ Provider2["OpenAI"] = "openai";
412
+ Provider2["Google"] = "google";
413
+ Provider2["Anthropic"] = "anthropic";
414
+ return Provider2;
415
+ })(Provider || {});
297
416
  var BaseEvaluator = class {
298
417
  telemetryClient;
299
418
  logger;
@@ -311,21 +430,35 @@ var BaseEvaluator = class {
311
430
  * name: 'My Evaluator',
312
431
  * description: 'Does something useful',
313
432
  * supportedGrades: ['3', '4', '5'],
314
- * requiresGoogleKey: true,
315
- * requiresOpenAIKey: false,
433
+ * defaultProviders: [Provider.Google],
316
434
  * };
317
435
  * }
318
436
  * ```
319
437
  */
320
438
  static metadata;
439
+ /**
440
+ * @throws {ConfigurationError} If the subclass has not defined static metadata
441
+ * @throws {ConfigurationError} If modelOverride has an invalid provider or empty model
442
+ * @throws {ConfigurationError} If a required API key is missing
443
+ */
321
444
  constructor(config) {
322
445
  this.logger = createLogger(config.logger, config.logLevel ?? 2 /* WARN */);
446
+ this.validateModelOverride(config);
323
447
  this.validateApiKeys(config);
324
448
  const telemetryConfig = this.normalizeTelemetryConfig(config.telemetry);
325
449
  this.config = {
326
450
  maxRetries: config.maxRetries ?? 2,
327
- telemetry: telemetryConfig
451
+ telemetry: telemetryConfig,
452
+ modelOverride: config.modelOverride,
453
+ googleApiKey: config.googleApiKey,
454
+ openaiApiKey: config.openaiApiKey,
455
+ anthropicApiKey: config.anthropicApiKey
328
456
  };
457
+ if (config.modelOverride) {
458
+ this.logger.warn(
459
+ `modelOverride is active: using ${config.modelOverride.provider}:${config.modelOverride.model} instead of the default model. Evaluation quality may differ from recommended defaults.`
460
+ );
461
+ }
329
462
  if (this.config.telemetry.enabled) {
330
463
  this.telemetryClient = new TelemetryClient({
331
464
  endpoint: "https://api.learningcommons.org/evaluators-telemetry/v1/events",
@@ -350,21 +483,62 @@ var BaseEvaluator = class {
350
483
  return meta;
351
484
  }
352
485
  /**
353
- * Validate that required API keys are provided based on metadata
354
- * @throws {ConfigurationError} If required API keys are missing
486
+ * Validate modelOverride shape: provider must be a known Provider value and
487
+ * model must be a non-empty string.
488
+ * @throws {ConfigurationError} If the override is malformed
355
489
  */
356
- validateApiKeys(config) {
357
- if (this.metadata.requiresGoogleKey && !config.googleApiKey) {
490
+ validateModelOverride(config) {
491
+ if (!config.modelOverride) return;
492
+ const validProviders = Object.values(Provider);
493
+ if (!validProviders.includes(config.modelOverride.provider)) {
358
494
  throw new ConfigurationError(
359
- `Google API key is required for ${this.metadata.name} evaluator. Pass googleApiKey in config.`
495
+ `Invalid provider "${config.modelOverride.provider}" in modelOverride. Valid providers are: ${validProviders.join(", ")}.`
360
496
  );
361
497
  }
362
- if (this.metadata.requiresOpenAIKey && !config.openaiApiKey) {
498
+ if (!config.modelOverride.model || config.modelOverride.model.trim() === "") {
363
499
  throw new ConfigurationError(
364
- `OpenAI API key is required for ${this.metadata.name} evaluator. Pass openaiApiKey in config.`
500
+ `modelOverride.model is required. Specify the model ID for provider "${config.modelOverride.provider}".`
365
501
  );
366
502
  }
367
503
  }
504
+ /**
505
+ * Validate that the required API key is present.
506
+ * When modelOverride is set, checks the override provider's key.
507
+ * Otherwise checks the keys required by the evaluator's default providers.
508
+ * @throws {ConfigurationError} If a required key is missing
509
+ */
510
+ validateApiKeys(config) {
511
+ const keyFor = {
512
+ ["openai" /* OpenAI */]: config.openaiApiKey?.trim() || void 0,
513
+ ["google" /* Google */]: config.googleApiKey?.trim() || void 0,
514
+ ["anthropic" /* Anthropic */]: config.anthropicApiKey?.trim() || void 0
515
+ };
516
+ const humanName = {
517
+ ["openai" /* OpenAI */]: "OpenAI API key",
518
+ ["google" /* Google */]: "Google API key",
519
+ ["anthropic" /* Anthropic */]: "Anthropic API key"
520
+ };
521
+ const configKey = {
522
+ ["openai" /* OpenAI */]: "openaiApiKey",
523
+ ["google" /* Google */]: "googleApiKey",
524
+ ["anthropic" /* Anthropic */]: "anthropicApiKey"
525
+ };
526
+ if (config.modelOverride) {
527
+ if (!keyFor[config.modelOverride.provider]) {
528
+ throw new ConfigurationError(
529
+ `${humanName[config.modelOverride.provider]} is required when using modelOverride with provider "${config.modelOverride.provider}". Pass ${configKey[config.modelOverride.provider]} in config.`
530
+ );
531
+ }
532
+ return;
533
+ }
534
+ for (const provider of this.metadata.defaultProviders) {
535
+ if (!keyFor[provider]) {
536
+ throw new ConfigurationError(
537
+ `${humanName[provider]} is required for ${this.metadata.name} evaluator. Pass ${configKey[provider]} in config.`
538
+ );
539
+ }
540
+ }
541
+ }
368
542
  /**
369
543
  * Normalize telemetry config to standard format
370
544
  */
@@ -445,6 +619,33 @@ var BaseEvaluator = class {
445
619
  );
446
620
  }
447
621
  }
622
+ /**
623
+ * Create an LLM provider, honouring modelOverride if set.
624
+ * When override is active, the key for the override provider is resolved
625
+ * from the matching top-level config field (e.g. anthropicApiKey for Anthropic).
626
+ */
627
+ createConfiguredProvider(defaultType, defaultModel, defaultApiKey) {
628
+ const override = this.config.modelOverride;
629
+ if (override) {
630
+ const apiKeyFor = {
631
+ ["openai" /* OpenAI */]: this.config.openaiApiKey,
632
+ ["google" /* Google */]: this.config.googleApiKey,
633
+ ["anthropic" /* Anthropic */]: this.config.anthropicApiKey
634
+ };
635
+ return createProvider({
636
+ type: override.provider,
637
+ model: override.model,
638
+ apiKey: apiKeyFor[override.provider],
639
+ maxRetries: this.config.maxRetries
640
+ });
641
+ }
642
+ return createProvider({
643
+ type: defaultType,
644
+ model: defaultModel,
645
+ apiKey: defaultApiKey,
646
+ maxRetries: this.config.maxRetries
647
+ });
648
+ }
448
649
  /**
449
650
  * Send telemetry event to analytics service
450
651
  * Common helper for all evaluators
@@ -465,123 +666,12 @@ var BaseEvaluator = class {
465
666
  provider: params.provider,
466
667
  token_usage: params.tokenUsage,
467
668
  metadata: params.metadata,
669
+ model_override: this.config.modelOverride ? true : void 0,
468
670
  // Include input text only if recording is enabled
469
671
  input_text: this.config.telemetry.recordInputs ? params.inputText : void 0
470
672
  });
471
673
  }
472
674
  };
473
- var DEFAULT_MODELS = {
474
- openai: "gpt-4o",
475
- anthropic: "claude-sonnet-4-5-20250929",
476
- google: "gemini-2.5-pro"
477
- };
478
- var VercelAIProvider = class {
479
- constructor(config) {
480
- this.config = config;
481
- if (config.type === "custom") {
482
- throw new Error(
483
- "VercelAIProvider does not support custom type. Use config.customProvider directly."
484
- );
485
- }
486
- }
487
- /**
488
- * Generate structured output using Vercel AI SDK's generateText with output
489
- */
490
- async generateStructured(request) {
491
- const model = await this.getModel(request.model);
492
- const startTime = Date.now();
493
- const { output, usage } = await ai.generateText({
494
- model,
495
- messages: request.messages,
496
- output: ai.Output.object({ schema: request.schema }),
497
- temperature: request.temperature ?? 0,
498
- maxRetries: this.config.maxRetries ?? 0,
499
- ...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
500
- });
501
- return {
502
- data: output,
503
- model: request.model || this.getDefaultModel(),
504
- usage: {
505
- inputTokens: usage.inputTokens || 0,
506
- outputTokens: usage.outputTokens || 0
507
- },
508
- latencyMs: Date.now() - startTime
509
- };
510
- }
511
- /**
512
- * Generate plain text using Vercel AI SDK's generateText
513
- */
514
- async generateText(messages, temperature) {
515
- const model = await this.getModel();
516
- const startTime = Date.now();
517
- const { text, usage } = await ai.generateText({
518
- model,
519
- messages,
520
- temperature: temperature ?? this.config.temperature ?? 0,
521
- maxRetries: this.config.maxRetries ?? 0
522
- });
523
- return {
524
- text,
525
- usage: {
526
- inputTokens: usage.inputTokens || 0,
527
- outputTokens: usage.outputTokens || 0
528
- },
529
- latencyMs: Date.now() - startTime
530
- };
531
- }
532
- /**
533
- * Get the configured language model.
534
- * Uses dynamic imports so consumers only need to install the provider packages they use.
535
- */
536
- async getModel(requestModel) {
537
- const modelId = requestModel || this.config.model || this.getDefaultModel();
538
- const apiKey = this.config.apiKey;
539
- switch (this.config.type) {
540
- case "openai": {
541
- const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
542
- throw new Error(
543
- "To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
544
- );
545
- });
546
- return createOpenAI(apiKey ? { apiKey } : {})(modelId);
547
- }
548
- case "anthropic": {
549
- const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
550
- throw new Error(
551
- "To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
552
- );
553
- });
554
- return createAnthropic(apiKey ? { apiKey } : {})(modelId);
555
- }
556
- case "google": {
557
- const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
558
- throw new Error(
559
- "To use the Google provider, install its adapter: npm install @ai-sdk/google"
560
- );
561
- });
562
- return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(modelId);
563
- }
564
- default:
565
- throw new Error(`Unsupported provider type: ${this.config.type}`);
566
- }
567
- }
568
- /**
569
- * Get default model for the configured provider
570
- */
571
- getDefaultModel() {
572
- const providerType = this.config.type;
573
- if (providerType === "custom") {
574
- throw new Error("Cannot get default model for custom provider type");
575
- }
576
- return DEFAULT_MODELS[providerType];
577
- }
578
- };
579
- function createProvider(config) {
580
- if (config.type === "custom" && config.customProvider) {
581
- return config.customProvider;
582
- }
583
- return new VercelAIProvider(config);
584
- }
585
675
  var TextComplexityLevel = zod.z.enum([
586
676
  "Slightly complex",
587
677
  "Moderately complex",
@@ -783,6 +873,44 @@ function featuresToJSON(features, decimals = 1, castToInt = true) {
783
873
  }
784
874
  return JSON.stringify(payload, null, 2);
785
875
  }
876
+ var LIBRARY_ADAPTERS = {
877
+ "text-readability": {
878
+ call(fnName, text) {
879
+ const fn = textReadability__default.default[fnName];
880
+ if (typeof fn !== "function") {
881
+ throw new Error(`Function "${fnName}" not found in text-readability.`);
882
+ }
883
+ return fn.call(textReadability__default.default, text);
884
+ }
885
+ }
886
+ };
887
+ var POST_TRANSFORMS = {
888
+ round(value, { precision = 0 }) {
889
+ const factor = 10 ** precision;
890
+ return Math.round(value * factor) / factor;
891
+ }
892
+ };
893
+ function runPreprocessingStep(text, impl) {
894
+ const adapter = LIBRARY_ADAPTERS[impl.library];
895
+ if (!adapter) {
896
+ const supported = Object.keys(LIBRARY_ADAPTERS).join(", ");
897
+ throw new Error(
898
+ `Unsupported preprocessing library "${impl.library}". Supported: ${supported}.`
899
+ );
900
+ }
901
+ let result = adapter.call(impl.function, text);
902
+ if (impl.post_transform) {
903
+ const transform = POST_TRANSFORMS[impl.post_transform.type];
904
+ if (!transform) {
905
+ const supported = Object.keys(POST_TRANSFORMS).join(", ");
906
+ throw new Error(
907
+ `Unsupported post_transform type "${impl.post_transform.type}". Supported: ${supported}.`
908
+ );
909
+ }
910
+ result = transform(result, impl.post_transform);
911
+ }
912
+ return result;
913
+ }
786
914
 
787
915
  // ../../evals/prompts/vocabulary/background-knowledge.txt
788
916
  var background_knowledge_default = `
@@ -1088,32 +1216,28 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1088
1216
  name: "Vocabulary",
1089
1217
  description: "Evaluates vocabulary complexity of educational texts relative to grade level",
1090
1218
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1091
- requiresGoogleKey: true,
1092
- requiresOpenAIKey: true
1219
+ defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
1093
1220
  };
1094
1221
  grades34ComplexityProvider;
1095
1222
  otherGradesComplexityProvider;
1096
1223
  backgroundKnowledgeProvider;
1097
1224
  constructor(config) {
1098
1225
  super(config);
1099
- this.grades34ComplexityProvider = createProvider({
1100
- type: "google",
1101
- model: "gemini-2.5-pro",
1102
- apiKey: config.googleApiKey,
1103
- maxRetries: this.config.maxRetries
1104
- });
1105
- this.otherGradesComplexityProvider = createProvider({
1106
- type: "openai",
1107
- model: "gpt-4.1-2025-04-14",
1108
- apiKey: config.openaiApiKey,
1109
- maxRetries: this.config.maxRetries
1110
- });
1111
- this.backgroundKnowledgeProvider = createProvider({
1112
- type: "openai",
1113
- model: "gpt-4o-2024-11-20",
1114
- apiKey: config.openaiApiKey,
1115
- maxRetries: this.config.maxRetries
1116
- });
1226
+ this.grades34ComplexityProvider = this.createConfiguredProvider(
1227
+ "google" /* Google */,
1228
+ "gemini-2.5-pro",
1229
+ config.googleApiKey
1230
+ );
1231
+ this.otherGradesComplexityProvider = this.createConfiguredProvider(
1232
+ "openai" /* OpenAI */,
1233
+ "gpt-4.1-2025-04-14",
1234
+ config.openaiApiKey
1235
+ );
1236
+ this.backgroundKnowledgeProvider = this.createConfiguredProvider(
1237
+ "openai" /* OpenAI */,
1238
+ "gpt-4o-2024-11-20",
1239
+ config.openaiApiKey
1240
+ );
1117
1241
  }
1118
1242
  /**
1119
1243
  * Evaluate vocabulary complexity for a given text and grade level
@@ -1122,6 +1246,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1122
1246
  * @param grade - The target grade level (3-12)
1123
1247
  * @returns Evaluation result with complexity score and detailed analysis
1124
1248
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1249
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1125
1250
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1126
1251
  */
1127
1252
  async evaluate(text, grade) {
@@ -1133,7 +1258,9 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1133
1258
  });
1134
1259
  const startTime = Date.now();
1135
1260
  const stageDetails = [];
1136
- const complexityProviderName = grade === "3" || grade === "4" ? "google:gemini-2.5-pro" : "openai:gpt-4.1-2025-04-14";
1261
+ const complexityProviderLabel = grade === "3" || grade === "4" ? this.grades34ComplexityProvider.label : this.otherGradesComplexityProvider.label;
1262
+ const backgroundProviderLabel = this.backgroundKnowledgeProvider.label;
1263
+ const modelLabel = this.config.modelOverride ? backgroundProviderLabel : `${backgroundProviderLabel}+${complexityProviderLabel}`;
1137
1264
  try {
1138
1265
  this.validateText(text);
1139
1266
  this.validateGrade(grade, new Set(_VocabularyEvaluator.metadata.supportedGrades));
@@ -1144,7 +1271,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1144
1271
  const bgResponse = await this.getBackgroundKnowledgeAssumption(text, grade);
1145
1272
  stageDetails.push({
1146
1273
  stage: "background_knowledge",
1147
- provider: "openai:gpt-4o-2024-11-20",
1274
+ provider: backgroundProviderLabel,
1148
1275
  latency_ms: bgResponse.latencyMs,
1149
1276
  token_usage: {
1150
1277
  input_tokens: bgResponse.usage.inputTokens,
@@ -1160,7 +1287,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1160
1287
  );
1161
1288
  stageDetails.push({
1162
1289
  stage: "complexity_evaluation",
1163
- provider: complexityProviderName,
1290
+ provider: complexityProviderLabel,
1164
1291
  latency_ms: complexityResponse.latencyMs,
1165
1292
  token_usage: {
1166
1293
  input_tokens: complexityResponse.usage.inputTokens,
@@ -1176,8 +1303,10 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1176
1303
  score: complexityResponse.data.complexity_score,
1177
1304
  reasoning: complexityResponse.data.reasoning,
1178
1305
  metadata: {
1179
- model: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1180
- processingTimeMs: latencyMs
1306
+ model: modelLabel,
1307
+ processingTimeMs: latencyMs,
1308
+ inputTokens: totalTokenUsage.input_tokens,
1309
+ outputTokens: totalTokenUsage.output_tokens
1181
1310
  },
1182
1311
  _internal: complexityResponse.data
1183
1312
  };
@@ -1186,7 +1315,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1186
1315
  latencyMs,
1187
1316
  textLength: text.length,
1188
1317
  grade,
1189
- provider: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1318
+ provider: modelLabel,
1190
1319
  tokenUsage: totalTokenUsage,
1191
1320
  metadata: {
1192
1321
  stage_details: stageDetails
@@ -1221,7 +1350,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1221
1350
  latencyMs,
1222
1351
  textLength: text.length,
1223
1352
  grade,
1224
- provider: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1353
+ provider: modelLabel,
1225
1354
  tokenUsage: totalTokenUsage,
1226
1355
  errorCode: error instanceof Error ? error.name : "UnknownError",
1227
1356
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -1439,25 +1568,12 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1439
1568
  name: "Sentence Structure",
1440
1569
  description: "Evaluates sentence structure complexity based on grammatical features",
1441
1570
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1442
- requiresGoogleKey: false,
1443
- requiresOpenAIKey: true
1571
+ defaultProviders: ["openai" /* OpenAI */]
1444
1572
  };
1445
- analysisProvider;
1446
- complexityProvider;
1573
+ provider;
1447
1574
  constructor(config) {
1448
1575
  super(config);
1449
- this.analysisProvider = createProvider({
1450
- type: "openai",
1451
- model: "gpt-4o",
1452
- apiKey: config.openaiApiKey,
1453
- maxRetries: this.config.maxRetries
1454
- });
1455
- this.complexityProvider = createProvider({
1456
- type: "openai",
1457
- model: "gpt-4o",
1458
- apiKey: config.openaiApiKey,
1459
- maxRetries: this.config.maxRetries
1460
- });
1576
+ this.provider = this.createConfiguredProvider("openai" /* OpenAI */, "gpt-4o", config.openaiApiKey);
1461
1577
  }
1462
1578
  /**
1463
1579
  * Evaluate sentence structure complexity for a given text and grade level
@@ -1466,6 +1582,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1466
1582
  * @param grade - The target grade level (3-12)
1467
1583
  * @returns Evaluation result with complexity score and detailed analysis
1468
1584
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1585
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1469
1586
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1470
1587
  */
1471
1588
  async evaluate(text, grade) {
@@ -1487,7 +1604,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1487
1604
  const analysisResponse = await this.analyzeSentenceStructure(text);
1488
1605
  stageDetails.push({
1489
1606
  stage: "sentence_analysis",
1490
- provider: "openai:gpt-4o",
1607
+ provider: this.provider.label,
1491
1608
  latency_ms: analysisResponse.latencyMs,
1492
1609
  token_usage: {
1493
1610
  input_tokens: analysisResponse.usage.inputTokens,
@@ -1502,7 +1619,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1502
1619
  const complexityResponse = await this.classifyComplexity(features, grade, text);
1503
1620
  stageDetails.push({
1504
1621
  stage: "complexity_classification",
1505
- provider: "openai:gpt-4o",
1622
+ provider: this.provider.label,
1506
1623
  latency_ms: complexityResponse.latencyMs,
1507
1624
  token_usage: {
1508
1625
  input_tokens: complexityResponse.usage.inputTokens,
@@ -1518,8 +1635,10 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1518
1635
  score: complexityResponse.data.answer,
1519
1636
  reasoning: complexityResponse.data.reasoning,
1520
1637
  metadata: {
1521
- model: "openai:gpt-4o",
1522
- processingTimeMs: latencyMs
1638
+ model: this.provider.label,
1639
+ processingTimeMs: latencyMs,
1640
+ inputTokens: totalTokenUsage.input_tokens,
1641
+ outputTokens: totalTokenUsage.output_tokens
1523
1642
  },
1524
1643
  _internal: {
1525
1644
  sentenceAnalysis: analysisResponse.data,
@@ -1532,7 +1651,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1532
1651
  latencyMs,
1533
1652
  textLength: text.length,
1534
1653
  grade,
1535
- provider: "openai:gpt-4o",
1654
+ provider: this.provider.label,
1536
1655
  tokenUsage: totalTokenUsage,
1537
1656
  metadata: {
1538
1657
  stage_details: stageDetails
@@ -1567,7 +1686,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1567
1686
  latencyMs,
1568
1687
  textLength: text.length,
1569
1688
  grade,
1570
- provider: "openai:gpt-4o",
1689
+ provider: this.provider.label,
1571
1690
  tokenUsage: totalTokenUsage,
1572
1691
  errorCode: error instanceof Error ? error.name : "UnknownError",
1573
1692
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -1595,7 +1714,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1595
1714
  `flesch_kincaid_grade: ${metrics.fleschKincaidGrade}`
1596
1715
  ].join("\n");
1597
1716
  const userPrompt = getUserPromptAnalysis(text, gtCountsStr);
1598
- const response = await this.analysisProvider.generateStructured({
1717
+ const response = await this.provider.generateStructured({
1599
1718
  messages: [
1600
1719
  { role: "system", content: getSystemPromptAnalysis() },
1601
1720
  { role: "user", content: userPrompt }
@@ -1617,7 +1736,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1617
1736
  async classifyComplexity(features, grade, excerpt) {
1618
1737
  const featuresJSON = featuresToJSON(features, 1, true);
1619
1738
  const userPrompt = getUserPromptComplexity(featuresJSON, grade, excerpt);
1620
- const response = await this.complexityProvider.generateStructured({
1739
+ const response = await this.provider.generateStructured({
1621
1740
  messages: [
1622
1741
  { role: "system", content: getSystemPromptComplexity() },
1623
1742
  { role: "user", content: userPrompt }
@@ -1673,18 +1792,16 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1673
1792
  description: "Determines appropriate grade level for text with scaffolding recommendations",
1674
1793
  supportedGrades: [],
1675
1794
  // No grade parameter required - evaluates what grade the text is appropriate for
1676
- requiresGoogleKey: true,
1677
- requiresOpenAIKey: false
1795
+ defaultProviders: ["google" /* Google */]
1678
1796
  };
1679
1797
  provider;
1680
1798
  constructor(config) {
1681
1799
  super(config);
1682
- this.provider = createProvider({
1683
- type: "google",
1684
- model: "gemini-2.5-pro",
1685
- apiKey: config.googleApiKey,
1686
- maxRetries: this.config.maxRetries
1687
- });
1800
+ this.provider = this.createConfiguredProvider(
1801
+ "google" /* Google */,
1802
+ "gemini-2.5-pro",
1803
+ config.googleApiKey
1804
+ );
1688
1805
  }
1689
1806
  /**
1690
1807
  * Evaluate grade level appropriateness for a given text
@@ -1692,6 +1809,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1692
1809
  * @param text - The text to evaluate
1693
1810
  * @returns Evaluation result with grade recommendations and scaffolding suggestions
1694
1811
  * @throws {ValidationError} If text is empty or too short/long
1812
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1695
1813
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1696
1814
  */
1697
1815
  async evaluate(text) {
@@ -1725,8 +1843,10 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1725
1843
  score: response.data.grade,
1726
1844
  reasoning: response.data.reasoning,
1727
1845
  metadata: {
1728
- model: "google:gemini-2.5-pro",
1729
- processingTimeMs: latencyMs
1846
+ model: this.provider.label,
1847
+ processingTimeMs: latencyMs,
1848
+ inputTokens: tokenUsage.input_tokens,
1849
+ outputTokens: tokenUsage.output_tokens
1730
1850
  },
1731
1851
  _internal: response.data
1732
1852
  };
@@ -1734,7 +1854,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1734
1854
  status: "success",
1735
1855
  latencyMs,
1736
1856
  textLength: text.length,
1737
- provider: "google:gemini-2.5-pro",
1857
+ provider: this.provider.label,
1738
1858
  tokenUsage,
1739
1859
  // No metadata.stage_details for single-stage evaluator
1740
1860
  inputText: text
@@ -1759,7 +1879,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1759
1879
  status: "error",
1760
1880
  latencyMs,
1761
1881
  textLength: text.length,
1762
- provider: "google:gemini-2.5-pro",
1882
+ provider: this.provider.label,
1763
1883
  errorCode: error instanceof Error ? error.name : "UnknownError",
1764
1884
  inputText: text
1765
1885
  }).catch(() => {
@@ -1870,18 +1990,16 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1870
1990
  name: "Subject Matter Knowledge",
1871
1991
  description: "Evaluates background knowledge demands of educational texts relative to grade level",
1872
1992
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1873
- requiresGoogleKey: true,
1874
- requiresOpenAIKey: false
1993
+ defaultProviders: ["google" /* Google */]
1875
1994
  };
1876
1995
  provider;
1877
1996
  constructor(config) {
1878
1997
  super(config);
1879
- this.provider = createProvider({
1880
- type: "google",
1881
- model: "gemini-3-flash-preview",
1882
- apiKey: config.googleApiKey,
1883
- maxRetries: this.config.maxRetries
1884
- });
1998
+ this.provider = this.createConfiguredProvider(
1999
+ "google" /* Google */,
2000
+ "gemini-3-flash-preview",
2001
+ config.googleApiKey
2002
+ );
1885
2003
  }
1886
2004
  /**
1887
2005
  * Evaluate subject matter knowledge complexity for a given text and grade level
@@ -1890,6 +2008,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1890
2008
  * @param grade - The target grade level (3-12)
1891
2009
  * @returns Evaluation result with complexity score and detailed analysis
1892
2010
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2011
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1893
2012
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1894
2013
  */
1895
2014
  async evaluate(text, grade) {
@@ -1912,7 +2031,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1912
2031
  const response = await this.evaluateSmk(text, grade, fkScore);
1913
2032
  stageDetails.push({
1914
2033
  stage: "smk_evaluation",
1915
- provider: "google:gemini-3-flash-preview",
2034
+ provider: this.provider.label,
1916
2035
  latency_ms: response.latencyMs,
1917
2036
  token_usage: {
1918
2037
  input_tokens: response.usage.inputTokens,
@@ -1928,8 +2047,10 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1928
2047
  score: response.data.complexity_score,
1929
2048
  reasoning: response.data.reasoning,
1930
2049
  metadata: {
1931
- model: "google:gemini-3-flash-preview",
1932
- processingTimeMs: latencyMs
2050
+ model: this.provider.label,
2051
+ processingTimeMs: latencyMs,
2052
+ inputTokens: totalTokenUsage.input_tokens,
2053
+ outputTokens: totalTokenUsage.output_tokens
1933
2054
  },
1934
2055
  _internal: response.data
1935
2056
  };
@@ -1938,7 +2059,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1938
2059
  latencyMs,
1939
2060
  textLength: text.length,
1940
2061
  grade,
1941
- provider: "google:gemini-3-flash-preview",
2062
+ provider: this.provider.label,
1942
2063
  tokenUsage: totalTokenUsage,
1943
2064
  metadata: {
1944
2065
  stage_details: stageDetails
@@ -1973,7 +2094,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1973
2094
  latencyMs,
1974
2095
  textLength: text.length,
1975
2096
  grade,
1976
- provider: "google:gemini-3-flash-preview",
2097
+ provider: this.provider.label,
1977
2098
  tokenUsage: totalTokenUsage,
1978
2099
  errorCode: error instanceof Error ? error.name : "UnknownError",
1979
2100
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -2077,18 +2198,16 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2077
2198
  name: "Conventionality",
2078
2199
  description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
2079
2200
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
2080
- requiresGoogleKey: true,
2081
- requiresOpenAIKey: false
2201
+ defaultProviders: ["google" /* Google */]
2082
2202
  };
2083
2203
  provider;
2084
2204
  constructor(config) {
2085
2205
  super(config);
2086
- this.provider = createProvider({
2087
- type: "google",
2088
- model: "gemini-3-flash-preview",
2089
- apiKey: config.googleApiKey,
2090
- maxRetries: this.config.maxRetries
2091
- });
2206
+ this.provider = this.createConfiguredProvider(
2207
+ "google" /* Google */,
2208
+ "gemini-3-flash-preview",
2209
+ config.googleApiKey
2210
+ );
2092
2211
  }
2093
2212
  /**
2094
2213
  * Evaluate conventionality complexity for a given text and grade level
@@ -2097,6 +2216,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2097
2216
  * @param grade - The target grade level (3-12)
2098
2217
  * @returns Evaluation result with complexity score and detailed analysis
2099
2218
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2219
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2100
2220
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2101
2221
  */
2102
2222
  async evaluate(text, grade) {
@@ -2119,7 +2239,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2119
2239
  const response = await this.evaluateConventionality(text, grade, fkScore);
2120
2240
  stageDetails.push({
2121
2241
  stage: "conventionality_evaluation",
2122
- provider: "google:gemini-3-flash-preview",
2242
+ provider: this.provider.label,
2123
2243
  latency_ms: response.latencyMs,
2124
2244
  token_usage: {
2125
2245
  input_tokens: response.usage.inputTokens,
@@ -2135,8 +2255,10 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2135
2255
  score: response.data.complexity_score,
2136
2256
  reasoning: response.data.reasoning,
2137
2257
  metadata: {
2138
- model: "google:gemini-3-flash-preview",
2139
- processingTimeMs: latencyMs
2258
+ model: this.provider.label,
2259
+ processingTimeMs: latencyMs,
2260
+ inputTokens: totalTokenUsage.input_tokens,
2261
+ outputTokens: totalTokenUsage.output_tokens
2140
2262
  },
2141
2263
  _internal: response.data
2142
2264
  };
@@ -2145,7 +2267,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2145
2267
  latencyMs,
2146
2268
  textLength: text.length,
2147
2269
  grade,
2148
- provider: "google:gemini-3-flash-preview",
2270
+ provider: this.provider.label,
2149
2271
  tokenUsage: totalTokenUsage,
2150
2272
  metadata: {
2151
2273
  stage_details: stageDetails
@@ -2180,7 +2302,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2180
2302
  latencyMs,
2181
2303
  textLength: text.length,
2182
2304
  grade,
2183
- provider: "google:gemini-3-flash-preview",
2305
+ provider: this.provider.label,
2184
2306
  tokenUsage: totalTokenUsage,
2185
2307
  errorCode: error instanceof Error ? error.name : "UnknownError",
2186
2308
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -2212,6 +2334,278 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2212
2334
  };
2213
2335
  }
2214
2336
  };
2337
+ var PurposeOutputSchema = zod.z.object({ "complexity_score": zod.z.enum(["slightly_complex", "moderately_complex", "very_complex", "exceedingly_complex", "more_context_needed"]).describe("The Purpose complexity level for the target grade."), "reasoning": zod.z.string().describe("A high-level summary of why the text is at this complexity level for the target grade."), "details": zod.z.object({ "detailed_summary": zod.z.array(zod.z.object({ "factor": zod.z.string().describe("The specific text complexity factor identified."), "description": zod.z.string().describe("How this factor manifests in the text."), "effect_on_complexity_dimension": zod.z.string().describe("How this factor affects the reader's ability to understand the text's specific complexity dimension.") }).strict()).describe("Individual complexity factors with descriptions and their effects."), "adjustment_and_scaffolding": zod.z.array(zod.z.object({ "scaffolding_need": zod.z.string().describe("The complexity factor that requires scaffolding."), "suggestion": zod.z.string().describe("A specific instructional strategy to support students with this factor.") }).strict()).describe("Scaffolding strategies to make the text accessible at the target grade."), "recommended_use_cases": zod.z.array(zod.z.object({ "opportunity": zod.z.string().describe("An instructional opportunity related to the text."), "suggestion": zod.z.string().describe("A specific way to leverage this text for that instructional purpose.") }).strict()).describe("Additional instructional opportunities for using this text.") }).strict().describe("Practical instructional details including scaffolding strategies and recommended use cases.") }).strict();
2338
+
2339
+ // ../../evals/prompts/purpose/system.txt
2340
+ var system_default4 = '\n Role\n You are an expert reading assessment evaluator. Your task is to determine the Text Complexity of a given passage based exclusively on the Purpose dimension of the qualitative measures rubric.\n\n Task Details\n You will be provided with an informational or literary `text`, along with its `grade_level` and `fk_score` (Flesch-Kincaid). You must analyze the text and determine how difficult it is for a reader to identify the author\'s purpose. \n\n Crucially, you must distinguish between the text\'s *topic* (what it is about) and its *purpose* (why the author wrote it). \n\n Rubric: Purpose Complexity\n Exceedingly Complex: Subtle and intricate, difficult to determine; includes many theoretical or abstract elements.\n Very Complex: Implicit or subtle but fairly easy to infer; more theoretical or abstract than concrete.\n Moderately Complex: Implied but easy to identify based upon context or source.\n Slightly Complex: Explicitly stated, clear, concrete, narrowly focused.\n More Context Needed: The text is a fragment or lacks necessary introductory context, making the true purpose impossible to determine accurately without external background knowledge.\n\n Expert Rules for Evaluating Purpose\n Based on expert consensus and historical grading corrections, you must apply the following heuristics:\n\n 1. The "Slightly Complex" Benchmark (Straightforward and Explicit)\n A text is Slightly Complex if its purpose is explicitly stated or if its informative intent is straightforward, clear, concrete, and directly answers what the text is immediately about. If the text opens by clearly identifying a concrete topic (e.g., "Pins are made of either brass or iron wire") and rigidly follows through by explaining factual, practical information or a process (like manufacturing steps or geographic facts), the purpose is considered explicit and straightforward. It does *not* require a literal statement like "The purpose of this text is to..." as long as the delivery of information is direct, clear, and unadorned by persuasive elements or complex framing.\n\n 2. Moderately Complex via Guiding Questions & Inquiry Formats\n If a text begins with a general introduction and uses guiding questions (e.g., "Have you ever wondered how clouds are formed?") to transition into an explanation, the purpose is implied rather than explicitly stated upfront. Because the reader must recognize the question as the pivot point for the author\'s intent, it is Moderately Complex.\n\n 3. Moderately Complex via Multiple Distinct Informational Goals\n If a text covers a broad topic but jumps between several distinct scientific or informational objectives without an overarching framing device or explicit thesis (e.g., talking about measuring ice sheets, then mapping, then finding meteorites), the reader must synthesize these diverse facts to recognize the broader purpose, making it Moderately Complex.\n\n 4. Moderately Complex via Arguments Disguised as Information\n If an author is arguing a specific point, correcting a misconception, or defending a stance, but the text could initially be mistaken by students as purely informative factual text, it is Moderately Complex. The reader must infer the persuasive intent or argumentative purpose beneath the informative tone.\n\n 5. "More Context Needed" for Fragments\n If a text is a fragment missing a crucial introduction or context, and identifying the author\'s purpose beyond a simple surface-level description would be exceptionally difficult for a reader in the target grade level without that external background, score it as `more_context_needed`. \n\n Output Format\n Provide your evaluation in the following structure:\n reasoning:\n - Surface Analysis: Identify if the text clearly identifies its topic and delivers straightforward facts, or if it utilizes structural cues, titles, or direct thesis statements.\n - Subtlety & Framing: Is the informative purpose straightforward and concrete? Does it use guiding questions? Is it an argument disguised as pure information? Are there multiple distinct informational goals requiring synthesis?\n - Context Check: Is this text a fragment missing crucial context that obscures the deeper purpose for the target grade level?\n - Rubric Alignment: Explain how the text aligns with the specific language of the rubric, explicitly referencing the expert rules above. Justify why it isn\'t one level simpler or more complex.\n\n answer:\n - complexity_score: (slightly_complex, moderately_complex, very_complex, exceedingly_complex, more_context_needed)\n - reasoning: A brief summary of your final decision.\n - details: Structured breakdown of PurposeDetails including detailed_summary, adjustment_and_scaffolding, and recommended_use_cases.\n';
2341
+
2342
+ // ../../evals/prompts/purpose/user.txt
2343
+ var user_default4 = "Analyze:\nText: {text}\nGrade: {grade_level}\nFK Score: {fk_score}";
2344
+
2345
+ // ../../evals/prompts/purpose/config.json
2346
+ var config_default = {
2347
+ evaluator: {
2348
+ id: "literacy.gla.purpose",
2349
+ name: "Purpose Dimension Text Complexity Evaluator",
2350
+ description: "Evaluates the Purpose dimension of qualitative text complexity for K-12 reading assessment, producing a 5-level rubric rating with structured pedagogical detail."
2351
+ },
2352
+ preprocessing: [
2353
+ {
2354
+ id: "fk_score",
2355
+ kind: "flesch_kincaid_grade",
2356
+ description: "Compute the Flesch-Kincaid Grade Level for the input text and bind it to {fk_score} in the prompt.",
2357
+ input: "text",
2358
+ output: "fk_score",
2359
+ implementation: {
2360
+ python: {
2361
+ library: "textstat",
2362
+ function: "flesch_kincaid_grade",
2363
+ post_transform: {
2364
+ type: "round",
2365
+ precision: 2
2366
+ }
2367
+ },
2368
+ typescript: {
2369
+ library: "text-readability",
2370
+ function: "fleschKincaidGrade",
2371
+ post_transform: {
2372
+ type: "round",
2373
+ precision: 2
2374
+ }
2375
+ }
2376
+ }
2377
+ }
2378
+ ],
2379
+ steps: [
2380
+ {
2381
+ id: "evaluate_purpose",
2382
+ description: "Single-call LLM step that produces the EvaluatorOutput JSON.",
2383
+ prompt: {
2384
+ type: "chat",
2385
+ messages: [
2386
+ {
2387
+ role: "system",
2388
+ source_path: "system.txt",
2389
+ sha256: "745b95b7d54dc845b99363c9d3360355381883c22a5f6a0f305d7349cae38a54"
2390
+ },
2391
+ {
2392
+ role: "user",
2393
+ source_path: "user.txt",
2394
+ sha256: "cd8e6347db1a55d104e34436f8f66e833bd6583645d4786a554aaefdd26479b2"
2395
+ }
2396
+ ],
2397
+ placeholders: {
2398
+ text: {
2399
+ required: true,
2400
+ source: "input"
2401
+ },
2402
+ grade_level: {
2403
+ required: true,
2404
+ source: "input"
2405
+ },
2406
+ fk_score: {
2407
+ required: true,
2408
+ source: "preprocessing.fk_score"
2409
+ }
2410
+ }
2411
+ },
2412
+ model: {
2413
+ provider: "google",
2414
+ name: "gemini-3-flash-preview"
2415
+ },
2416
+ generation: {
2417
+ temperature: 0
2418
+ },
2419
+ parser: {
2420
+ kind: "structured_output"
2421
+ },
2422
+ output_binding: "formatted_output"
2423
+ }
2424
+ ]};
2425
+
2426
+ // src/prompts/purpose/index.ts
2427
+ var STEP_ID = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
2428
+ var _step = config_default.steps.find((s) => s.id === STEP_ID);
2429
+ if (!_step) throw new Error(`Step "${STEP_ID}" not found in purpose config.json`);
2430
+ var PLACEHOLDER_KEYS = Object.keys(_step.prompt.placeholders);
2431
+ function applyPlaceholders(template, inputs) {
2432
+ return PLACEHOLDER_KEYS.reduce(
2433
+ (text, key) => key in inputs ? text.replaceAll(`{${key}}`, inputs[key]) : text,
2434
+ template
2435
+ );
2436
+ }
2437
+ function getSystemPrompt5(inputs) {
2438
+ return applyPlaceholders(system_default4, inputs);
2439
+ }
2440
+ function getUserPrompt5(inputs) {
2441
+ return applyPlaceholders(user_default4, inputs);
2442
+ }
2443
+
2444
+ // ../../evals/prompts/purpose/input_schema.json
2445
+ var input_schema_default = {
2446
+ properties: {
2447
+ grade_level: {
2448
+ minimum: 3,
2449
+ maximum: 12}
2450
+ }
2451
+ };
2452
+
2453
+ // src/evaluators/purpose.ts
2454
+ var STEP_ID2 = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
2455
+ var _step2 = config_default.steps.find((s) => s.id === STEP_ID2);
2456
+ if (!_step2) throw new Error(`Step "${STEP_ID2}" not found in purpose config.json`);
2457
+ var STEP = _step2;
2458
+ var GRADE_MIN = input_schema_default.properties.grade_level.minimum;
2459
+ var GRADE_MAX = input_schema_default.properties.grade_level.maximum;
2460
+ var SUPPORTED_GRADES = Array.from({ length: GRADE_MAX - GRADE_MIN + 1 }, (_, i) => String(GRADE_MIN + i));
2461
+ var COMPLEXITY_SCORE_DISPLAY = {
2462
+ "slightly_complex": "Slightly complex",
2463
+ "moderately_complex": "Moderately complex",
2464
+ "very_complex": "Very complex",
2465
+ "exceedingly_complex": "Exceedingly complex",
2466
+ "more_context_needed": "More context needed"
2467
+ };
2468
+ var PurposeEvaluator = class _PurposeEvaluator extends BaseEvaluator {
2469
+ static metadata = {
2470
+ id: config_default.evaluator.id,
2471
+ name: config_default.evaluator.name,
2472
+ description: config_default.evaluator.description,
2473
+ supportedGrades: SUPPORTED_GRADES,
2474
+ defaultProviders: ["google" /* Google */]
2475
+ };
2476
+ static TEMPERATURE = STEP.generation.temperature;
2477
+ static computeFkScore(text) {
2478
+ const fkStep = config_default.preprocessing.find((p) => p.id === "fk_score");
2479
+ if (!fkStep) throw new Error("fk_score preprocessing step not found in purpose config.json");
2480
+ return runPreprocessingStep(text, fkStep.implementation.typescript);
2481
+ }
2482
+ provider;
2483
+ constructor(config) {
2484
+ super(config);
2485
+ this.provider = this.createConfiguredProvider(
2486
+ "google" /* Google */,
2487
+ STEP.model.name,
2488
+ config.googleApiKey
2489
+ );
2490
+ }
2491
+ /**
2492
+ * Evaluate purpose complexity for a given text and grade level
2493
+ *
2494
+ * @param text - The text to evaluate
2495
+ * @param grade - The target grade level (3-12)
2496
+ * @returns Evaluation result with complexity score and detailed analysis
2497
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2498
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2499
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2500
+ */
2501
+ async evaluate(text, grade) {
2502
+ this.logger.info("Starting Purpose evaluation", {
2503
+ evaluator: _PurposeEvaluator.metadata.id,
2504
+ operation: "evaluate",
2505
+ grade,
2506
+ textLength: text.length
2507
+ });
2508
+ const startTime = Date.now();
2509
+ const stageDetails = [];
2510
+ try {
2511
+ this.validateText(text);
2512
+ const gradeNum = this.parseAndValidateGrade(grade);
2513
+ const fkScore = _PurposeEvaluator.computeFkScore(text);
2514
+ const inputs = {
2515
+ text,
2516
+ grade_level: String(gradeNum),
2517
+ fk_score: String(fkScore)
2518
+ };
2519
+ const response = await this.callLLM(inputs);
2520
+ const latencyMs = Date.now() - startTime;
2521
+ const tokenUsage = {
2522
+ input_tokens: response.usage.inputTokens,
2523
+ output_tokens: response.usage.outputTokens
2524
+ };
2525
+ stageDetails.push({
2526
+ stage: STEP.id,
2527
+ provider: this.provider.label,
2528
+ latency_ms: response.latencyMs,
2529
+ token_usage: tokenUsage
2530
+ });
2531
+ const result = {
2532
+ score: COMPLEXITY_SCORE_DISPLAY[response.data.complexity_score],
2533
+ reasoning: response.data.reasoning,
2534
+ metadata: {
2535
+ model: this.provider.label,
2536
+ processingTimeMs: latencyMs,
2537
+ inputTokens: tokenUsage.input_tokens,
2538
+ outputTokens: tokenUsage.output_tokens
2539
+ },
2540
+ _internal: response.data
2541
+ };
2542
+ this.sendTelemetry({
2543
+ status: "success",
2544
+ latencyMs,
2545
+ textLength: text.length,
2546
+ grade: String(gradeNum),
2547
+ provider: this.provider.label,
2548
+ tokenUsage,
2549
+ metadata: { stage_details: stageDetails },
2550
+ inputText: text
2551
+ }).catch(() => void 0);
2552
+ this.logger.info("Purpose evaluation completed successfully", {
2553
+ evaluator: _PurposeEvaluator.metadata.id,
2554
+ operation: "evaluate",
2555
+ grade: gradeNum,
2556
+ score: result.score,
2557
+ processingTimeMs: latencyMs
2558
+ });
2559
+ return result;
2560
+ } catch (error) {
2561
+ const latencyMs = Date.now() - startTime;
2562
+ this.logger.error("Purpose evaluation failed", {
2563
+ evaluator: _PurposeEvaluator.metadata.id,
2564
+ operation: "evaluate",
2565
+ grade,
2566
+ error: error instanceof Error ? error : void 0,
2567
+ processingTimeMs: latencyMs
2568
+ });
2569
+ const tokenUsage = stageDetails.length > 0 ? {
2570
+ input_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.input_tokens ?? 0), 0),
2571
+ output_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.output_tokens ?? 0), 0)
2572
+ } : void 0;
2573
+ this.sendTelemetry({
2574
+ status: "error",
2575
+ latencyMs,
2576
+ textLength: text.length,
2577
+ grade: String(grade),
2578
+ provider: this.provider.label,
2579
+ tokenUsage,
2580
+ errorCode: error instanceof Error ? error.name : "UnknownError",
2581
+ metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
2582
+ inputText: text
2583
+ }).catch(() => void 0);
2584
+ if (error instanceof ValidationError) throw error;
2585
+ throw wrapProviderError(error, "Purpose evaluation failed");
2586
+ }
2587
+ }
2588
+ parseAndValidateGrade(grade) {
2589
+ const num = Number(grade.trim());
2590
+ if (!Number.isInteger(num) || num < GRADE_MIN || num > GRADE_MAX) {
2591
+ throw new ValidationError(
2592
+ `Invalid grade "${grade}". Purpose evaluator supports integer grades ${GRADE_MIN}\u2013${GRADE_MAX}.`
2593
+ );
2594
+ }
2595
+ return num;
2596
+ }
2597
+ async callLLM(inputs) {
2598
+ const response = await this.provider.generateStructured({
2599
+ messages: [
2600
+ { role: "system", content: getSystemPrompt5(inputs) },
2601
+ { role: "user", content: getUserPrompt5(inputs) }
2602
+ ],
2603
+ schema: PurposeOutputSchema,
2604
+ temperature: _PurposeEvaluator.TEMPERATURE
2605
+ });
2606
+ return { data: response.data, usage: response.usage, latencyMs: response.latencyMs };
2607
+ }
2608
+ };
2215
2609
 
2216
2610
  // src/batch/evaluator.ts
2217
2611
  var EVALUATOR_MAP = /* @__PURE__ */ new Map([
@@ -2219,19 +2613,21 @@ var EVALUATOR_MAP = /* @__PURE__ */ new Map([
2219
2613
  [SmkEvaluator.metadata.id, SmkEvaluator],
2220
2614
  [VocabularyEvaluator.metadata.id, VocabularyEvaluator],
2221
2615
  [SentenceStructureEvaluator.metadata.id, SentenceStructureEvaluator],
2222
- [ConventionalityEvaluator.metadata.id, ConventionalityEvaluator]
2616
+ [ConventionalityEvaluator.metadata.id, ConventionalityEvaluator],
2617
+ [PurposeEvaluator.metadata.id, PurposeEvaluator]
2223
2618
  ]);
2224
2619
  var EVALUATOR_GROUPS = [
2225
2620
  {
2226
2621
  id: "text-complexity",
2227
2622
  name: "Text Complexity Analysis",
2228
- description: "Evaluates vocabulary complexity, sentence structure, subject matter knowledge, conventionality, and grade-level appropriateness",
2623
+ description: "Evaluates all dimensions of the Qualitative Text Complexity rubric",
2229
2624
  evaluatorIds: [
2230
2625
  GradeLevelAppropriatenessEvaluator.metadata.id,
2231
2626
  SmkEvaluator.metadata.id,
2232
2627
  VocabularyEvaluator.metadata.id,
2233
2628
  SentenceStructureEvaluator.metadata.id,
2234
- ConventionalityEvaluator.metadata.id
2629
+ ConventionalityEvaluator.metadata.id,
2630
+ PurposeEvaluator.metadata.id
2235
2631
  ],
2236
2632
  requiresGoogleKey: true,
2237
2633
  requiresOpenAIKey: true,
@@ -2252,6 +2648,7 @@ var BatchEvaluator = class {
2252
2648
  concurrency: 3,
2253
2649
  maxRetries: 2,
2254
2650
  telemetry: false,
2651
+ bypassRowLimit: false,
2255
2652
  ...config
2256
2653
  };
2257
2654
  this.limit = pLimit__default.default(this.config.concurrency);
@@ -2405,9 +2802,9 @@ var BatchEvaluator = class {
2405
2802
  `Unknown evaluator group: "${groupId}". Available: ${EVALUATOR_GROUPS.map((g) => g.id).join(", ")}`
2406
2803
  );
2407
2804
  }
2408
- if (inputs.length > group.maxInputRows) {
2805
+ if (!this.config.bypassRowLimit && inputs.length > group.maxInputRows) {
2409
2806
  throw new Error(
2410
- `Input exceeds limit for "${group.id}": ${inputs.length} rows (max ${group.maxInputRows}). Split into smaller batches.`
2807
+ `Input exceeds limit for "${group.id}": ${inputs.length} rows (max ${group.maxInputRows}). Split into smaller batches, or pass { bypassRowLimit: true } in BatchConfig to bypass (use --bypass-row-limit on the CLI).`
2411
2808
  );
2412
2809
  }
2413
2810
  this.isCancelled = false;
@@ -3322,6 +3719,8 @@ var COMPLEXITY_SCORE_MAP = {
3322
3719
  "moderately complex": 2,
3323
3720
  "very complex": 3,
3324
3721
  "exceedingly complex": 4
3722
+ // 'more context needed' has no numeric equivalent — rows with this score appear as N/A
3723
+ // in individual results and are excluded from aggregate stats, same as failed evaluations.
3325
3724
  };
3326
3725
  function evaluatorDisplayName(id) {
3327
3726
  return id.split("-").map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
@@ -3376,7 +3775,8 @@ function groupResultsByRow(results) {
3376
3775
  return grouped;
3377
3776
  }
3378
3777
  function formatEvaluatorPrefix(evaluatorId) {
3379
- return evaluatorId.replace(/-/g, "_");
3778
+ const slug = evaluatorId.includes(".") ? evaluatorId.split(".").pop() : evaluatorId;
3779
+ return slug.replace(/-/g, "_");
3380
3780
  }
3381
3781
  function escapeCSV(field) {
3382
3782
  if (field.includes(",") || field.includes('"') || field.includes("\n")) {