@learning-commons/evaluators 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,12 +1,13 @@
1
1
  import { z } from 'zod';
2
+ import { generateText, Output } from 'ai';
2
3
  import { randomUUID } from 'crypto';
3
4
  import { readFileSync, mkdirSync, writeFileSync } from 'fs';
4
5
  import { dirname, join } from 'path';
5
6
  import { homedir } from 'os';
6
7
  import { fileURLToPath } from 'url';
7
- import { generateText, Output } from 'ai';
8
8
  import nlp from 'compromise';
9
9
  import { syllable } from 'syllable';
10
+ import textReadability from 'text-readability';
10
11
  import pLimit from 'p-limit';
11
12
 
12
13
  // src/schemas/outputs.ts
@@ -25,6 +26,7 @@ var GradeLevelAppropriatenessSchema = z.object({
25
26
  alternative_grade: GradeBand.describe("An alternative grade level for the text"),
26
27
  scaffolding_needed: z.string().describe("Scaffolding needed for the text to be appropriate for the alternative grade")
27
28
  });
29
+ var PurposeOutputSchema = z.object({ "complexity_score": z.enum(["slightly_complex", "moderately_complex", "very_complex", "exceedingly_complex", "more_context_needed"]).describe("The Purpose complexity level for the target grade."), "reasoning": z.string().describe("A high-level summary of why the text is at this complexity level for the target grade."), "details": z.object({ "detailed_summary": z.array(z.object({ "factor": z.string().describe("The specific text complexity factor identified."), "description": z.string().describe("How this factor manifests in the text."), "effect_on_complexity_dimension": z.string().describe("How this factor affects the reader's ability to understand the text's specific complexity dimension.") }).strict()).describe("Individual complexity factors with descriptions and their effects."), "adjustment_and_scaffolding": z.array(z.object({ "scaffolding_need": z.string().describe("The complexity factor that requires scaffolding."), "suggestion": z.string().describe("A specific instructional strategy to support students with this factor.") }).strict()).describe("Scaffolding strategies to make the text accessible at the target grade."), "recommended_use_cases": z.array(z.object({ "opportunity": z.string().describe("An instructional opportunity related to the text."), "suggestion": z.string().describe("A specific way to leverage this text for that instructional purpose.") }).strict()).describe("Additional instructional opportunities for using this text.") }).strict().describe("Practical instructional details including scaffolding strategies and recommended use cases.") }).strict();
28
30
 
29
31
  // src/errors.ts
30
32
  var EvaluatorError = class extends Error {
@@ -85,8 +87,9 @@ var TimeoutError = class extends APIError {
85
87
  function parseProviderError(error) {
86
88
  if (error instanceof Error) {
87
89
  const message = error.message;
90
+ const err = error;
88
91
  const statusMatch = message.match(/\b(4\d{2}|5\d{2})\b/);
89
- const statusCode = statusMatch ? parseInt(statusMatch[1]) : void 0;
92
+ const statusCode = err.statusCode ?? err.status ?? (statusMatch ? parseInt(statusMatch[1]) : void 0);
90
93
  return {
91
94
  message,
92
95
  statusCode,
@@ -99,6 +102,11 @@ function parseProviderError(error) {
99
102
  }
100
103
  function wrapProviderError(error, defaultMessage = "API request failed") {
101
104
  const { message, statusCode, code } = parseProviderError(error);
105
+ if (statusCode === 404 || statusCode === 400 && /\bmodel\b.*(not found|does not exist|invalid)/i.test(message)) {
106
+ return new ConfigurationError(
107
+ `Model not found or invalid: ${message}. Check the model ID passed to the provider.`
108
+ );
109
+ }
102
110
  if (statusCode === 401 || statusCode === 403) {
103
111
  return new AuthenticationError(
104
112
  message.includes("API key") ? message : "Invalid API key",
@@ -181,6 +189,119 @@ function createLogger(customLogger, level = 2 /* WARN */) {
181
189
  }
182
190
  return new ConsoleLogger(level);
183
191
  }
192
+
193
+ // src/providers/base.ts
194
+ var Providers = {
195
+ google: "google",
196
+ openai: "openai",
197
+ anthropic: "anthropic",
198
+ custom: "custom"
199
+ };
200
+ var VercelAIProvider = class {
201
+ constructor(config) {
202
+ this.config = config;
203
+ if (config.type === "custom") {
204
+ throw new Error(
205
+ "VercelAIProvider does not support custom type. Use config.customProvider directly."
206
+ );
207
+ }
208
+ if (!config.model || config.model.trim() === "") {
209
+ throw new Error(
210
+ `model is required for VercelAIProvider (type: "${config.type}"). No default is assumed.`
211
+ );
212
+ }
213
+ this.model = config.model;
214
+ this.label = `${config.type}:${config.model}`;
215
+ }
216
+ label;
217
+ model;
218
+ /**
219
+ * Generate structured output using Vercel AI SDK's generateText with output
220
+ */
221
+ async generateStructured(request) {
222
+ const model = await this.getModel();
223
+ const startTime = Date.now();
224
+ const { output, usage } = await generateText({
225
+ model,
226
+ messages: request.messages,
227
+ output: Output.object({ schema: request.schema }),
228
+ temperature: request.temperature ?? 0,
229
+ maxRetries: this.config.maxRetries ?? 0,
230
+ ...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
231
+ });
232
+ return {
233
+ data: output,
234
+ model: this.model,
235
+ usage: {
236
+ inputTokens: usage.inputTokens || 0,
237
+ outputTokens: usage.outputTokens || 0
238
+ },
239
+ latencyMs: Date.now() - startTime
240
+ };
241
+ }
242
+ /**
243
+ * Generate plain text using Vercel AI SDK's generateText
244
+ */
245
+ async generateText(messages, temperature) {
246
+ const model = await this.getModel();
247
+ const startTime = Date.now();
248
+ const { text, usage } = await generateText({
249
+ model,
250
+ messages,
251
+ temperature: temperature ?? this.config.temperature ?? 0,
252
+ maxRetries: this.config.maxRetries ?? 0
253
+ });
254
+ return {
255
+ text,
256
+ usage: {
257
+ inputTokens: usage.inputTokens || 0,
258
+ outputTokens: usage.outputTokens || 0
259
+ },
260
+ latencyMs: Date.now() - startTime
261
+ };
262
+ }
263
+ /**
264
+ * Get the configured language model.
265
+ * Uses dynamic imports so consumers only need to install the provider packages they use.
266
+ */
267
+ async getModel() {
268
+ const apiKey = this.config.apiKey;
269
+ switch (this.config.type) {
270
+ case "openai": {
271
+ const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
272
+ throw new Error(
273
+ "To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
274
+ );
275
+ });
276
+ return createOpenAI(apiKey ? { apiKey } : {})(this.model);
277
+ }
278
+ case "anthropic": {
279
+ const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
280
+ throw new Error(
281
+ "To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
282
+ );
283
+ });
284
+ return createAnthropic(apiKey ? { apiKey } : {})(this.model);
285
+ }
286
+ case "google": {
287
+ const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
288
+ throw new Error(
289
+ "To use the Google provider, install its adapter: npm install @ai-sdk/google"
290
+ );
291
+ });
292
+ return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(this.model);
293
+ }
294
+ default:
295
+ throw new Error(`Unsupported provider type: ${this.config.type}`);
296
+ }
297
+ }
298
+ };
299
+ function createProvider(config) {
300
+ if (config.type === "custom" && config.customProvider) {
301
+ return config.customProvider;
302
+ }
303
+ return new VercelAIProvider(config);
304
+ }
184
305
  var SentenceAnalysisSchema = z.object({
185
306
  reasoning: z.string().describe("Step-by-step reasoning for the analysis"),
186
307
  // Foundational
@@ -340,6 +461,12 @@ var VALIDATION_LIMITS = {
340
461
  /** Maximum text length in characters (100K chars ≈ 25K tokens) */
341
462
  MAX_TEXT_LENGTH: 1e5
342
463
  };
464
+ var Provider = /* @__PURE__ */ ((Provider2) => {
465
+ Provider2["OpenAI"] = "openai";
466
+ Provider2["Google"] = "google";
467
+ Provider2["Anthropic"] = "anthropic";
468
+ return Provider2;
469
+ })(Provider || {});
343
470
  var BaseEvaluator = class {
344
471
  telemetryClient;
345
472
  logger;
@@ -357,21 +484,35 @@ var BaseEvaluator = class {
357
484
  * name: 'My Evaluator',
358
485
  * description: 'Does something useful',
359
486
  * supportedGrades: ['3', '4', '5'],
360
- * requiresGoogleKey: true,
361
- * requiresOpenAIKey: false,
487
+ * defaultProviders: [Provider.Google],
362
488
  * };
363
489
  * }
364
490
  * ```
365
491
  */
366
492
  static metadata;
493
+ /**
494
+ * @throws {ConfigurationError} If the subclass has not defined static metadata
495
+ * @throws {ConfigurationError} If modelOverride has an invalid provider or empty model
496
+ * @throws {ConfigurationError} If a required API key is missing
497
+ */
367
498
  constructor(config) {
368
499
  this.logger = createLogger(config.logger, config.logLevel ?? 2 /* WARN */);
500
+ this.validateModelOverride(config);
369
501
  this.validateApiKeys(config);
370
502
  const telemetryConfig = this.normalizeTelemetryConfig(config.telemetry);
371
503
  this.config = {
372
504
  maxRetries: config.maxRetries ?? 2,
373
- telemetry: telemetryConfig
505
+ telemetry: telemetryConfig,
506
+ modelOverride: config.modelOverride,
507
+ googleApiKey: config.googleApiKey,
508
+ openaiApiKey: config.openaiApiKey,
509
+ anthropicApiKey: config.anthropicApiKey
374
510
  };
511
+ if (config.modelOverride) {
512
+ this.logger.warn(
513
+ `modelOverride is active: using ${config.modelOverride.provider}:${config.modelOverride.model} instead of the default model. Evaluation quality may differ from recommended defaults.`
514
+ );
515
+ }
375
516
  if (this.config.telemetry.enabled) {
376
517
  this.telemetryClient = new TelemetryClient({
377
518
  endpoint: "https://api.learningcommons.org/evaluators-telemetry/v1/events",
@@ -396,21 +537,62 @@ var BaseEvaluator = class {
396
537
  return meta;
397
538
  }
398
539
  /**
399
- * Validate that required API keys are provided based on metadata
400
- * @throws {ConfigurationError} If required API keys are missing
540
+ * Validate modelOverride shape: provider must be a known Provider value and
541
+ * model must be a non-empty string.
542
+ * @throws {ConfigurationError} If the override is malformed
401
543
  */
402
- validateApiKeys(config) {
403
- if (this.metadata.requiresGoogleKey && !config.googleApiKey) {
544
+ validateModelOverride(config) {
545
+ if (!config.modelOverride) return;
546
+ const validProviders = Object.values(Provider);
547
+ if (!validProviders.includes(config.modelOverride.provider)) {
404
548
  throw new ConfigurationError(
405
- `Google API key is required for ${this.metadata.name} evaluator. Pass googleApiKey in config.`
549
+ `Invalid provider "${config.modelOverride.provider}" in modelOverride. Valid providers are: ${validProviders.join(", ")}.`
406
550
  );
407
551
  }
408
- if (this.metadata.requiresOpenAIKey && !config.openaiApiKey) {
552
+ if (!config.modelOverride.model || config.modelOverride.model.trim() === "") {
409
553
  throw new ConfigurationError(
410
- `OpenAI API key is required for ${this.metadata.name} evaluator. Pass openaiApiKey in config.`
554
+ `modelOverride.model is required. Specify the model ID for provider "${config.modelOverride.provider}".`
411
555
  );
412
556
  }
413
557
  }
558
+ /**
559
+ * Validate that the required API key is present.
560
+ * When modelOverride is set, checks the override provider's key.
561
+ * Otherwise checks the keys required by the evaluator's default providers.
562
+ * @throws {ConfigurationError} If a required key is missing
563
+ */
564
+ validateApiKeys(config) {
565
+ const keyFor = {
566
+ ["openai" /* OpenAI */]: config.openaiApiKey?.trim() || void 0,
567
+ ["google" /* Google */]: config.googleApiKey?.trim() || void 0,
568
+ ["anthropic" /* Anthropic */]: config.anthropicApiKey?.trim() || void 0
569
+ };
570
+ const humanName = {
571
+ ["openai" /* OpenAI */]: "OpenAI API key",
572
+ ["google" /* Google */]: "Google API key",
573
+ ["anthropic" /* Anthropic */]: "Anthropic API key"
574
+ };
575
+ const configKey = {
576
+ ["openai" /* OpenAI */]: "openaiApiKey",
577
+ ["google" /* Google */]: "googleApiKey",
578
+ ["anthropic" /* Anthropic */]: "anthropicApiKey"
579
+ };
580
+ if (config.modelOverride) {
581
+ if (!keyFor[config.modelOverride.provider]) {
582
+ throw new ConfigurationError(
583
+ `${humanName[config.modelOverride.provider]} is required when using modelOverride with provider "${config.modelOverride.provider}". Pass ${configKey[config.modelOverride.provider]} in config.`
584
+ );
585
+ }
586
+ return;
587
+ }
588
+ for (const provider of this.metadata.defaultProviders) {
589
+ if (!keyFor[provider]) {
590
+ throw new ConfigurationError(
591
+ `${humanName[provider]} is required for ${this.metadata.name} evaluator. Pass ${configKey[provider]} in config.`
592
+ );
593
+ }
594
+ }
595
+ }
414
596
  /**
415
597
  * Normalize telemetry config to standard format
416
598
  */
@@ -484,13 +666,40 @@ var BaseEvaluator = class {
484
666
  const validList = Array.from(validGrades).sort((a, b) => {
485
667
  if (a === "K") return -1;
486
668
  if (b === "K") return 1;
487
- return parseInt(a) - parseInt(b);
669
+ return parseInt(a, 10) - parseInt(b, 10);
488
670
  }).join(", ");
489
671
  throw new ValidationError(
490
672
  `Invalid grade "${grade}". Supported grades for this evaluator: ${validList}`
491
673
  );
492
674
  }
493
675
  }
676
+ /**
677
+ * Create an LLM provider, honouring modelOverride if set.
678
+ * When override is active, the key for the override provider is resolved
679
+ * from the matching top-level config field (e.g. anthropicApiKey for Anthropic).
680
+ */
681
+ createConfiguredProvider(defaultType, defaultModel, defaultApiKey) {
682
+ const override = this.config.modelOverride;
683
+ if (override) {
684
+ const apiKeyFor = {
685
+ ["openai" /* OpenAI */]: this.config.openaiApiKey,
686
+ ["google" /* Google */]: this.config.googleApiKey,
687
+ ["anthropic" /* Anthropic */]: this.config.anthropicApiKey
688
+ };
689
+ return createProvider({
690
+ type: override.provider,
691
+ model: override.model,
692
+ apiKey: apiKeyFor[override.provider],
693
+ maxRetries: this.config.maxRetries
694
+ });
695
+ }
696
+ return createProvider({
697
+ type: defaultType,
698
+ model: defaultModel,
699
+ apiKey: defaultApiKey,
700
+ maxRetries: this.config.maxRetries
701
+ });
702
+ }
494
703
  /**
495
704
  * Send telemetry event to analytics service
496
705
  * Common helper for all evaluators
@@ -511,123 +720,12 @@ var BaseEvaluator = class {
511
720
  provider: params.provider,
512
721
  token_usage: params.tokenUsage,
513
722
  metadata: params.metadata,
723
+ model_override: this.config.modelOverride ? true : void 0,
514
724
  // Include input text only if recording is enabled
515
725
  input_text: this.config.telemetry.recordInputs ? params.inputText : void 0
516
726
  });
517
727
  }
518
728
  };
519
- var DEFAULT_MODELS = {
520
- openai: "gpt-4o",
521
- anthropic: "claude-sonnet-4-5-20250929",
522
- google: "gemini-2.5-pro"
523
- };
524
- var VercelAIProvider = class {
525
- constructor(config) {
526
- this.config = config;
527
- if (config.type === "custom") {
528
- throw new Error(
529
- "VercelAIProvider does not support custom type. Use config.customProvider directly."
530
- );
531
- }
532
- }
533
- /**
534
- * Generate structured output using Vercel AI SDK's generateText with output
535
- */
536
- async generateStructured(request) {
537
- const model = await this.getModel(request.model);
538
- const startTime = Date.now();
539
- const { output, usage } = await generateText({
540
- model,
541
- messages: request.messages,
542
- output: Output.object({ schema: request.schema }),
543
- temperature: request.temperature ?? 0,
544
- maxRetries: this.config.maxRetries ?? 0,
545
- ...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
546
- });
547
- return {
548
- data: output,
549
- model: request.model || this.getDefaultModel(),
550
- usage: {
551
- inputTokens: usage.inputTokens || 0,
552
- outputTokens: usage.outputTokens || 0
553
- },
554
- latencyMs: Date.now() - startTime
555
- };
556
- }
557
- /**
558
- * Generate plain text using Vercel AI SDK's generateText
559
- */
560
- async generateText(messages, temperature) {
561
- const model = await this.getModel();
562
- const startTime = Date.now();
563
- const { text, usage } = await generateText({
564
- model,
565
- messages,
566
- temperature: temperature ?? this.config.temperature ?? 0,
567
- maxRetries: this.config.maxRetries ?? 0
568
- });
569
- return {
570
- text,
571
- usage: {
572
- inputTokens: usage.inputTokens || 0,
573
- outputTokens: usage.outputTokens || 0
574
- },
575
- latencyMs: Date.now() - startTime
576
- };
577
- }
578
- /**
579
- * Get the configured language model.
580
- * Uses dynamic imports so consumers only need to install the provider packages they use.
581
- */
582
- async getModel(requestModel) {
583
- const modelId = requestModel || this.config.model || this.getDefaultModel();
584
- const apiKey = this.config.apiKey;
585
- switch (this.config.type) {
586
- case "openai": {
587
- const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
588
- throw new Error(
589
- "To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
590
- );
591
- });
592
- return createOpenAI(apiKey ? { apiKey } : {})(modelId);
593
- }
594
- case "anthropic": {
595
- const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
596
- throw new Error(
597
- "To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
598
- );
599
- });
600
- return createAnthropic(apiKey ? { apiKey } : {})(modelId);
601
- }
602
- case "google": {
603
- const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
604
- throw new Error(
605
- "To use the Google provider, install its adapter: npm install @ai-sdk/google"
606
- );
607
- });
608
- return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(modelId);
609
- }
610
- default:
611
- throw new Error(`Unsupported provider type: ${this.config.type}`);
612
- }
613
- }
614
- /**
615
- * Get default model for the configured provider
616
- */
617
- getDefaultModel() {
618
- const providerType = this.config.type;
619
- if (providerType === "custom") {
620
- throw new Error("Cannot get default model for custom provider type");
621
- }
622
- return DEFAULT_MODELS[providerType];
623
- }
624
- };
625
- function createProvider(config) {
626
- if (config.type === "custom" && config.customProvider) {
627
- return config.customProvider;
628
- }
629
- return new VercelAIProvider(config);
630
- }
631
729
  var VocabularyComplexitySchema = z.object({
632
730
  tier_2_words: z.string().describe("List of Tier 2 words (academic words)"),
633
731
  tier_3_words: z.string().describe("List of Tier 3 words (domain-specific)"),
@@ -821,6 +919,44 @@ function featuresToJSON(features, decimals = 1, castToInt = true) {
821
919
  }
822
920
  return JSON.stringify(payload, null, 2);
823
921
  }
922
+ var LIBRARY_ADAPTERS = {
923
+ "text-readability": {
924
+ call(fnName, text) {
925
+ const fn = textReadability[fnName];
926
+ if (typeof fn !== "function") {
927
+ throw new Error(`Function "${fnName}" not found in text-readability.`);
928
+ }
929
+ return fn.call(textReadability, text);
930
+ }
931
+ }
932
+ };
933
+ var POST_TRANSFORMS = {
934
+ round(value, { precision = 0 }) {
935
+ const factor = 10 ** precision;
936
+ return Math.round(value * factor) / factor;
937
+ }
938
+ };
939
+ function runPreprocessingStep(text, impl) {
940
+ const adapter = LIBRARY_ADAPTERS[impl.library];
941
+ if (!adapter) {
942
+ const supported = Object.keys(LIBRARY_ADAPTERS).join(", ");
943
+ throw new Error(
944
+ `Unsupported preprocessing library "${impl.library}". Supported: ${supported}.`
945
+ );
946
+ }
947
+ let result = adapter.call(impl.function, text);
948
+ if (impl.post_transform) {
949
+ const transform = POST_TRANSFORMS[impl.post_transform.type];
950
+ if (!transform) {
951
+ const supported = Object.keys(POST_TRANSFORMS).join(", ");
952
+ throw new Error(
953
+ `Unsupported post_transform type "${impl.post_transform.type}". Supported: ${supported}.`
954
+ );
955
+ }
956
+ result = transform(result, impl.post_transform);
957
+ }
958
+ return result;
959
+ }
824
960
 
825
961
  // ../../evals/prompts/vocabulary/background-knowledge.txt
826
962
  var background_knowledge_default = `
@@ -1126,32 +1262,28 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1126
1262
  name: "Vocabulary",
1127
1263
  description: "Evaluates vocabulary complexity of educational texts relative to grade level",
1128
1264
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1129
- requiresGoogleKey: true,
1130
- requiresOpenAIKey: true
1265
+ defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
1131
1266
  };
1132
1267
  grades34ComplexityProvider;
1133
1268
  otherGradesComplexityProvider;
1134
1269
  backgroundKnowledgeProvider;
1135
1270
  constructor(config) {
1136
1271
  super(config);
1137
- this.grades34ComplexityProvider = createProvider({
1138
- type: "google",
1139
- model: "gemini-2.5-pro",
1140
- apiKey: config.googleApiKey,
1141
- maxRetries: this.config.maxRetries
1142
- });
1143
- this.otherGradesComplexityProvider = createProvider({
1144
- type: "openai",
1145
- model: "gpt-4.1-2025-04-14",
1146
- apiKey: config.openaiApiKey,
1147
- maxRetries: this.config.maxRetries
1148
- });
1149
- this.backgroundKnowledgeProvider = createProvider({
1150
- type: "openai",
1151
- model: "gpt-4o-2024-11-20",
1152
- apiKey: config.openaiApiKey,
1153
- maxRetries: this.config.maxRetries
1154
- });
1272
+ this.grades34ComplexityProvider = this.createConfiguredProvider(
1273
+ "google" /* Google */,
1274
+ "gemini-2.5-pro",
1275
+ config.googleApiKey
1276
+ );
1277
+ this.otherGradesComplexityProvider = this.createConfiguredProvider(
1278
+ "openai" /* OpenAI */,
1279
+ "gpt-4.1-2025-04-14",
1280
+ config.openaiApiKey
1281
+ );
1282
+ this.backgroundKnowledgeProvider = this.createConfiguredProvider(
1283
+ "openai" /* OpenAI */,
1284
+ "gpt-4o-2024-11-20",
1285
+ config.openaiApiKey
1286
+ );
1155
1287
  }
1156
1288
  /**
1157
1289
  * Evaluate vocabulary complexity for a given text and grade level
@@ -1160,6 +1292,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1160
1292
  * @param grade - The target grade level (3-12)
1161
1293
  * @returns Evaluation result with complexity score and detailed analysis
1162
1294
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1295
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1163
1296
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1164
1297
  */
1165
1298
  async evaluate(text, grade) {
@@ -1171,7 +1304,9 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1171
1304
  });
1172
1305
  const startTime = Date.now();
1173
1306
  const stageDetails = [];
1174
- const complexityProviderName = grade === "3" || grade === "4" ? "google:gemini-2.5-pro" : "openai:gpt-4.1-2025-04-14";
1307
+ const complexityProviderLabel = grade === "3" || grade === "4" ? this.grades34ComplexityProvider.label : this.otherGradesComplexityProvider.label;
1308
+ const backgroundProviderLabel = this.backgroundKnowledgeProvider.label;
1309
+ const modelLabel = this.config.modelOverride ? backgroundProviderLabel : `${backgroundProviderLabel}+${complexityProviderLabel}`;
1175
1310
  try {
1176
1311
  this.validateText(text);
1177
1312
  this.validateGrade(grade, new Set(_VocabularyEvaluator.metadata.supportedGrades));
@@ -1182,7 +1317,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1182
1317
  const bgResponse = await this.getBackgroundKnowledgeAssumption(text, grade);
1183
1318
  stageDetails.push({
1184
1319
  stage: "background_knowledge",
1185
- provider: "openai:gpt-4o-2024-11-20",
1320
+ provider: backgroundProviderLabel,
1186
1321
  latency_ms: bgResponse.latencyMs,
1187
1322
  token_usage: {
1188
1323
  input_tokens: bgResponse.usage.inputTokens,
@@ -1198,7 +1333,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1198
1333
  );
1199
1334
  stageDetails.push({
1200
1335
  stage: "complexity_evaluation",
1201
- provider: complexityProviderName,
1336
+ provider: complexityProviderLabel,
1202
1337
  latency_ms: complexityResponse.latencyMs,
1203
1338
  token_usage: {
1204
1339
  input_tokens: complexityResponse.usage.inputTokens,
@@ -1214,7 +1349,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1214
1349
  score: complexityResponse.data.complexity_score,
1215
1350
  reasoning: complexityResponse.data.reasoning,
1216
1351
  metadata: {
1217
- model: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1352
+ model: modelLabel,
1218
1353
  processingTimeMs: latencyMs
1219
1354
  },
1220
1355
  _internal: complexityResponse.data
@@ -1224,7 +1359,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1224
1359
  latencyMs,
1225
1360
  textLength: text.length,
1226
1361
  grade,
1227
- provider: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1362
+ provider: modelLabel,
1228
1363
  tokenUsage: totalTokenUsage,
1229
1364
  metadata: {
1230
1365
  stage_details: stageDetails
@@ -1259,7 +1394,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1259
1394
  latencyMs,
1260
1395
  textLength: text.length,
1261
1396
  grade,
1262
- provider: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1397
+ provider: modelLabel,
1263
1398
  tokenUsage: totalTokenUsage,
1264
1399
  errorCode: error instanceof Error ? error.name : "UnknownError",
1265
1400
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -1430,25 +1565,12 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1430
1565
  name: "Sentence Structure",
1431
1566
  description: "Evaluates sentence structure complexity based on grammatical features",
1432
1567
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1433
- requiresGoogleKey: false,
1434
- requiresOpenAIKey: true
1568
+ defaultProviders: ["openai" /* OpenAI */]
1435
1569
  };
1436
- analysisProvider;
1437
- complexityProvider;
1570
+ provider;
1438
1571
  constructor(config) {
1439
1572
  super(config);
1440
- this.analysisProvider = createProvider({
1441
- type: "openai",
1442
- model: "gpt-4o",
1443
- apiKey: config.openaiApiKey,
1444
- maxRetries: this.config.maxRetries
1445
- });
1446
- this.complexityProvider = createProvider({
1447
- type: "openai",
1448
- model: "gpt-4o",
1449
- apiKey: config.openaiApiKey,
1450
- maxRetries: this.config.maxRetries
1451
- });
1573
+ this.provider = this.createConfiguredProvider("openai" /* OpenAI */, "gpt-4o", config.openaiApiKey);
1452
1574
  }
1453
1575
  /**
1454
1576
  * Evaluate sentence structure complexity for a given text and grade level
@@ -1457,6 +1579,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1457
1579
  * @param grade - The target grade level (3-12)
1458
1580
  * @returns Evaluation result with complexity score and detailed analysis
1459
1581
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1582
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1460
1583
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1461
1584
  */
1462
1585
  async evaluate(text, grade) {
@@ -1478,7 +1601,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1478
1601
  const analysisResponse = await this.analyzeSentenceStructure(text);
1479
1602
  stageDetails.push({
1480
1603
  stage: "sentence_analysis",
1481
- provider: "openai:gpt-4o",
1604
+ provider: this.provider.label,
1482
1605
  latency_ms: analysisResponse.latencyMs,
1483
1606
  token_usage: {
1484
1607
  input_tokens: analysisResponse.usage.inputTokens,
@@ -1493,7 +1616,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1493
1616
  const complexityResponse = await this.classifyComplexity(features, grade, text);
1494
1617
  stageDetails.push({
1495
1618
  stage: "complexity_classification",
1496
- provider: "openai:gpt-4o",
1619
+ provider: this.provider.label,
1497
1620
  latency_ms: complexityResponse.latencyMs,
1498
1621
  token_usage: {
1499
1622
  input_tokens: complexityResponse.usage.inputTokens,
@@ -1509,7 +1632,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1509
1632
  score: complexityResponse.data.answer,
1510
1633
  reasoning: complexityResponse.data.reasoning,
1511
1634
  metadata: {
1512
- model: "openai:gpt-4o",
1635
+ model: this.provider.label,
1513
1636
  processingTimeMs: latencyMs
1514
1637
  },
1515
1638
  _internal: {
@@ -1523,7 +1646,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1523
1646
  latencyMs,
1524
1647
  textLength: text.length,
1525
1648
  grade,
1526
- provider: "openai:gpt-4o",
1649
+ provider: this.provider.label,
1527
1650
  tokenUsage: totalTokenUsage,
1528
1651
  metadata: {
1529
1652
  stage_details: stageDetails
@@ -1558,7 +1681,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1558
1681
  latencyMs,
1559
1682
  textLength: text.length,
1560
1683
  grade,
1561
- provider: "openai:gpt-4o",
1684
+ provider: this.provider.label,
1562
1685
  tokenUsage: totalTokenUsage,
1563
1686
  errorCode: error instanceof Error ? error.name : "UnknownError",
1564
1687
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -1586,7 +1709,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1586
1709
  `flesch_kincaid_grade: ${metrics.fleschKincaidGrade}`
1587
1710
  ].join("\n");
1588
1711
  const userPrompt = getUserPromptAnalysis(text, gtCountsStr);
1589
- const response = await this.analysisProvider.generateStructured({
1712
+ const response = await this.provider.generateStructured({
1590
1713
  messages: [
1591
1714
  { role: "system", content: getSystemPromptAnalysis() },
1592
1715
  { role: "user", content: userPrompt }
@@ -1608,7 +1731,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1608
1731
  async classifyComplexity(features, grade, excerpt) {
1609
1732
  const featuresJSON = featuresToJSON(features, 1, true);
1610
1733
  const userPrompt = getUserPromptComplexity(featuresJSON, grade, excerpt);
1611
- const response = await this.complexityProvider.generateStructured({
1734
+ const response = await this.provider.generateStructured({
1612
1735
  messages: [
1613
1736
  { role: "system", content: getSystemPromptComplexity() },
1614
1737
  { role: "user", content: userPrompt }
@@ -1659,18 +1782,16 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1659
1782
  description: "Determines appropriate grade level for text with scaffolding recommendations",
1660
1783
  supportedGrades: [],
1661
1784
  // No grade parameter required - evaluates what grade the text is appropriate for
1662
- requiresGoogleKey: true,
1663
- requiresOpenAIKey: false
1785
+ defaultProviders: ["google" /* Google */]
1664
1786
  };
1665
1787
  provider;
1666
1788
  constructor(config) {
1667
1789
  super(config);
1668
- this.provider = createProvider({
1669
- type: "google",
1670
- model: "gemini-2.5-pro",
1671
- apiKey: config.googleApiKey,
1672
- maxRetries: this.config.maxRetries
1673
- });
1790
+ this.provider = this.createConfiguredProvider(
1791
+ "google" /* Google */,
1792
+ "gemini-2.5-pro",
1793
+ config.googleApiKey
1794
+ );
1674
1795
  }
1675
1796
  /**
1676
1797
  * Evaluate grade level appropriateness for a given text
@@ -1678,6 +1799,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1678
1799
  * @param text - The text to evaluate
1679
1800
  * @returns Evaluation result with grade recommendations and scaffolding suggestions
1680
1801
  * @throws {ValidationError} If text is empty or too short/long
1802
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1681
1803
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1682
1804
  */
1683
1805
  async evaluate(text) {
@@ -1711,7 +1833,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1711
1833
  score: response.data.grade,
1712
1834
  reasoning: response.data.reasoning,
1713
1835
  metadata: {
1714
- model: "google:gemini-2.5-pro",
1836
+ model: this.provider.label,
1715
1837
  processingTimeMs: latencyMs
1716
1838
  },
1717
1839
  _internal: response.data
@@ -1720,7 +1842,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1720
1842
  status: "success",
1721
1843
  latencyMs,
1722
1844
  textLength: text.length,
1723
- provider: "google:gemini-2.5-pro",
1845
+ provider: this.provider.label,
1724
1846
  tokenUsage,
1725
1847
  // No metadata.stage_details for single-stage evaluator
1726
1848
  inputText: text
@@ -1745,7 +1867,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1745
1867
  status: "error",
1746
1868
  latencyMs,
1747
1869
  textLength: text.length,
1748
- provider: "google:gemini-2.5-pro",
1870
+ provider: this.provider.label,
1749
1871
  errorCode: error instanceof Error ? error.name : "UnknownError",
1750
1872
  inputText: text
1751
1873
  }).catch(() => {
@@ -1860,18 +1982,16 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1860
1982
  name: "Subject Matter Knowledge",
1861
1983
  description: "Evaluates background knowledge demands of educational texts relative to grade level",
1862
1984
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1863
- requiresGoogleKey: true,
1864
- requiresOpenAIKey: false
1985
+ defaultProviders: ["google" /* Google */]
1865
1986
  };
1866
1987
  provider;
1867
1988
  constructor(config) {
1868
1989
  super(config);
1869
- this.provider = createProvider({
1870
- type: "google",
1871
- model: "gemini-3-flash-preview",
1872
- apiKey: config.googleApiKey,
1873
- maxRetries: this.config.maxRetries
1874
- });
1990
+ this.provider = this.createConfiguredProvider(
1991
+ "google" /* Google */,
1992
+ "gemini-3-flash-preview",
1993
+ config.googleApiKey
1994
+ );
1875
1995
  }
1876
1996
  /**
1877
1997
  * Evaluate subject matter knowledge complexity for a given text and grade level
@@ -1880,6 +2000,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1880
2000
  * @param grade - The target grade level (3-12)
1881
2001
  * @returns Evaluation result with complexity score and detailed analysis
1882
2002
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2003
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1883
2004
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1884
2005
  */
1885
2006
  async evaluate(text, grade) {
@@ -1902,7 +2023,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1902
2023
  const response = await this.evaluateSmk(text, grade, fkScore);
1903
2024
  stageDetails.push({
1904
2025
  stage: "smk_evaluation",
1905
- provider: "google:gemini-3-flash-preview",
2026
+ provider: this.provider.label,
1906
2027
  latency_ms: response.latencyMs,
1907
2028
  token_usage: {
1908
2029
  input_tokens: response.usage.inputTokens,
@@ -1918,7 +2039,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1918
2039
  score: response.data.complexity_score,
1919
2040
  reasoning: response.data.reasoning,
1920
2041
  metadata: {
1921
- model: "google:gemini-3-flash-preview",
2042
+ model: this.provider.label,
1922
2043
  processingTimeMs: latencyMs
1923
2044
  },
1924
2045
  _internal: response.data
@@ -1928,7 +2049,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1928
2049
  latencyMs,
1929
2050
  textLength: text.length,
1930
2051
  grade,
1931
- provider: "google:gemini-3-flash-preview",
2052
+ provider: this.provider.label,
1932
2053
  tokenUsage: totalTokenUsage,
1933
2054
  metadata: {
1934
2055
  stage_details: stageDetails
@@ -1963,7 +2084,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1963
2084
  latencyMs,
1964
2085
  textLength: text.length,
1965
2086
  grade,
1966
- provider: "google:gemini-3-flash-preview",
2087
+ provider: this.provider.label,
1967
2088
  tokenUsage: totalTokenUsage,
1968
2089
  errorCode: error instanceof Error ? error.name : "UnknownError",
1969
2090
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -2071,18 +2192,16 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2071
2192
  name: "Conventionality",
2072
2193
  description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
2073
2194
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
2074
- requiresGoogleKey: true,
2075
- requiresOpenAIKey: false
2195
+ defaultProviders: ["google" /* Google */]
2076
2196
  };
2077
2197
  provider;
2078
2198
  constructor(config) {
2079
2199
  super(config);
2080
- this.provider = createProvider({
2081
- type: "google",
2082
- model: "gemini-3-flash-preview",
2083
- apiKey: config.googleApiKey,
2084
- maxRetries: this.config.maxRetries
2085
- });
2200
+ this.provider = this.createConfiguredProvider(
2201
+ "google" /* Google */,
2202
+ "gemini-3-flash-preview",
2203
+ config.googleApiKey
2204
+ );
2086
2205
  }
2087
2206
  /**
2088
2207
  * Evaluate conventionality complexity for a given text and grade level
@@ -2091,6 +2210,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2091
2210
  * @param grade - The target grade level (3-12)
2092
2211
  * @returns Evaluation result with complexity score and detailed analysis
2093
2212
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2213
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2094
2214
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2095
2215
  */
2096
2216
  async evaluate(text, grade) {
@@ -2113,7 +2233,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2113
2233
  const response = await this.evaluateConventionality(text, grade, fkScore);
2114
2234
  stageDetails.push({
2115
2235
  stage: "conventionality_evaluation",
2116
- provider: "google:gemini-3-flash-preview",
2236
+ provider: this.provider.label,
2117
2237
  latency_ms: response.latencyMs,
2118
2238
  token_usage: {
2119
2239
  input_tokens: response.usage.inputTokens,
@@ -2129,7 +2249,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2129
2249
  score: response.data.complexity_score,
2130
2250
  reasoning: response.data.reasoning,
2131
2251
  metadata: {
2132
- model: "google:gemini-3-flash-preview",
2252
+ model: this.provider.label,
2133
2253
  processingTimeMs: latencyMs
2134
2254
  },
2135
2255
  _internal: response.data
@@ -2139,7 +2259,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2139
2259
  latencyMs,
2140
2260
  textLength: text.length,
2141
2261
  grade,
2142
- provider: "google:gemini-3-flash-preview",
2262
+ provider: this.provider.label,
2143
2263
  tokenUsage: totalTokenUsage,
2144
2264
  metadata: {
2145
2265
  stage_details: stageDetails
@@ -2174,7 +2294,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2174
2294
  latencyMs,
2175
2295
  textLength: text.length,
2176
2296
  grade,
2177
- provider: "google:gemini-3-flash-preview",
2297
+ provider: this.provider.label,
2178
2298
  tokenUsage: totalTokenUsage,
2179
2299
  errorCode: error instanceof Error ? error.name : "UnknownError",
2180
2300
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -2216,8 +2336,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2216
2336
  name: "Text Complexity",
2217
2337
  description: "Composite evaluator analyzing vocabulary, sentence structure, subject matter knowledge, and conventionality complexity",
2218
2338
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
2219
- requiresGoogleKey: true,
2220
- requiresOpenAIKey: true
2339
+ defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
2221
2340
  };
2222
2341
  vocabularyEvaluator;
2223
2342
  sentenceStructureEvaluator;
@@ -2242,7 +2361,8 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2242
2361
  * @param text - The text to evaluate
2243
2362
  * @param grade - The target grade level (3-12)
2244
2363
  * @returns Map of sub-evaluator results
2245
- * @throws {ValidationError} If text is empty or grade is invalid
2364
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2365
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2246
2366
  * @throws {Error} If all sub-evaluators fail
2247
2367
  */
2248
2368
  async evaluate(text, grade) {
@@ -2289,7 +2409,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2289
2409
  latencyMs,
2290
2410
  textLength: text.length,
2291
2411
  grade,
2292
- provider: "composite:google+openai",
2412
+ provider: this.config.modelOverride ? `${this.config.modelOverride.provider}:${this.config.modelOverride.model}` : "composite:google+openai",
2293
2413
  errorCode: hasFailures ? "PartialFailure" : void 0,
2294
2414
  inputText: text
2295
2415
  }).catch(() => {
@@ -2320,6 +2440,278 @@ async function evaluateTextComplexity(text, grade, config) {
2320
2440
  return evaluator.evaluate(text, grade);
2321
2441
  }
2322
2442
 
2323
- export { APIError, AuthenticationError, ComplexityClassificationSchema, ConfigurationError, ConventionalityEvaluator, EvaluatorError, GradeBand, GradeLevelAppropriatenessEvaluator, GradeLevelAppropriatenessSchema, LogLevel, NetworkError, RateLimitError, SentenceAnalysisSchema, SentenceStructureEvaluator, SmkEvaluator, TextComplexityEvaluator, TextComplexityLevel, TimeoutError, ValidationError, VocabularyEvaluator, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateConventionality, evaluateGradeLevelAppropriateness, evaluateSentenceStructure, evaluateSmk, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };
2443
+ // ../../evals/prompts/purpose/system.txt
2444
+ var system_default4 = '\n Role\n You are an expert reading assessment evaluator. Your task is to determine the Text Complexity of a given passage based exclusively on the Purpose dimension of the qualitative measures rubric.\n\n Task Details\n You will be provided with an informational or literary `text`, along with its `grade_level` and `fk_score` (Flesch-Kincaid). You must analyze the text and determine how difficult it is for a reader to identify the author\'s purpose. \n\n Crucially, you must distinguish between the text\'s *topic* (what it is about) and its *purpose* (why the author wrote it). \n\n Rubric: Purpose Complexity\n Exceedingly Complex: Subtle and intricate, difficult to determine; includes many theoretical or abstract elements.\n Very Complex: Implicit or subtle but fairly easy to infer; more theoretical or abstract than concrete.\n Moderately Complex: Implied but easy to identify based upon context or source.\n Slightly Complex: Explicitly stated, clear, concrete, narrowly focused.\n More Context Needed: The text is a fragment or lacks necessary introductory context, making the true purpose impossible to determine accurately without external background knowledge.\n\n Expert Rules for Evaluating Purpose\n Based on expert consensus and historical grading corrections, you must apply the following heuristics:\n\n 1. The "Slightly Complex" Benchmark (Straightforward and Explicit)\n A text is Slightly Complex if its purpose is explicitly stated or if its informative intent is straightforward, clear, concrete, and directly answers what the text is immediately about. If the text opens by clearly identifying a concrete topic (e.g., "Pins are made of either brass or iron wire") and rigidly follows through by explaining factual, practical information or a process (like manufacturing steps or geographic facts), the purpose is considered explicit and straightforward. It does *not* require a literal statement like "The purpose of this text is to..." as long as the delivery of information is direct, clear, and unadorned by persuasive elements or complex framing.\n\n 2. Moderately Complex via Guiding Questions & Inquiry Formats\n If a text begins with a general introduction and uses guiding questions (e.g., "Have you ever wondered how clouds are formed?") to transition into an explanation, the purpose is implied rather than explicitly stated upfront. Because the reader must recognize the question as the pivot point for the author\'s intent, it is Moderately Complex.\n\n 3. Moderately Complex via Multiple Distinct Informational Goals\n If a text covers a broad topic but jumps between several distinct scientific or informational objectives without an overarching framing device or explicit thesis (e.g., talking about measuring ice sheets, then mapping, then finding meteorites), the reader must synthesize these diverse facts to recognize the broader purpose, making it Moderately Complex.\n\n 4. Moderately Complex via Arguments Disguised as Information\n If an author is arguing a specific point, correcting a misconception, or defending a stance, but the text could initially be mistaken by students as purely informative factual text, it is Moderately Complex. The reader must infer the persuasive intent or argumentative purpose beneath the informative tone.\n\n 5. "More Context Needed" for Fragments\n If a text is a fragment missing a crucial introduction or context, and identifying the author\'s purpose beyond a simple surface-level description would be exceptionally difficult for a reader in the target grade level without that external background, score it as `more_context_needed`. \n\n Output Format\n Provide your evaluation in the following structure:\n reasoning:\n - Surface Analysis: Identify if the text clearly identifies its topic and delivers straightforward facts, or if it utilizes structural cues, titles, or direct thesis statements.\n - Subtlety & Framing: Is the informative purpose straightforward and concrete? Does it use guiding questions? Is it an argument disguised as pure information? Are there multiple distinct informational goals requiring synthesis?\n - Context Check: Is this text a fragment missing crucial context that obscures the deeper purpose for the target grade level?\n - Rubric Alignment: Explain how the text aligns with the specific language of the rubric, explicitly referencing the expert rules above. Justify why it isn\'t one level simpler or more complex.\n\n answer:\n - complexity_score: (slightly_complex, moderately_complex, very_complex, exceedingly_complex, more_context_needed)\n - reasoning: A brief summary of your final decision.\n - details: Structured breakdown of PurposeDetails including detailed_summary, adjustment_and_scaffolding, and recommended_use_cases.\n';
2445
+
2446
+ // ../../evals/prompts/purpose/user.txt
2447
+ var user_default4 = "Analyze:\nText: {text}\nGrade: {grade_level}\nFK Score: {fk_score}";
2448
+
2449
+ // ../../evals/prompts/purpose/config.json
2450
+ var config_default = {
2451
+ evaluator: {
2452
+ id: "literacy.gla.purpose",
2453
+ name: "Purpose Dimension Text Complexity Evaluator",
2454
+ description: "Evaluates the Purpose dimension of qualitative text complexity for K-12 reading assessment, producing a 5-level rubric rating with structured pedagogical detail."
2455
+ },
2456
+ preprocessing: [
2457
+ {
2458
+ id: "fk_score",
2459
+ kind: "flesch_kincaid_grade",
2460
+ description: "Compute the Flesch-Kincaid Grade Level for the input text and bind it to {fk_score} in the prompt.",
2461
+ input: "text",
2462
+ output: "fk_score",
2463
+ implementation: {
2464
+ python: {
2465
+ library: "textstat",
2466
+ function: "flesch_kincaid_grade",
2467
+ post_transform: {
2468
+ type: "round",
2469
+ precision: 2
2470
+ }
2471
+ },
2472
+ typescript: {
2473
+ library: "text-readability",
2474
+ function: "fleschKincaidGrade",
2475
+ post_transform: {
2476
+ type: "round",
2477
+ precision: 2
2478
+ }
2479
+ }
2480
+ }
2481
+ }
2482
+ ],
2483
+ steps: [
2484
+ {
2485
+ id: "evaluate_purpose",
2486
+ description: "Single-call LLM step that produces the EvaluatorOutput JSON.",
2487
+ prompt: {
2488
+ type: "chat",
2489
+ messages: [
2490
+ {
2491
+ role: "system",
2492
+ source_path: "system.txt",
2493
+ sha256: "745b95b7d54dc845b99363c9d3360355381883c22a5f6a0f305d7349cae38a54"
2494
+ },
2495
+ {
2496
+ role: "user",
2497
+ source_path: "user.txt",
2498
+ sha256: "cd8e6347db1a55d104e34436f8f66e833bd6583645d4786a554aaefdd26479b2"
2499
+ }
2500
+ ],
2501
+ placeholders: {
2502
+ text: {
2503
+ required: true,
2504
+ source: "input"
2505
+ },
2506
+ grade_level: {
2507
+ required: true,
2508
+ source: "input"
2509
+ },
2510
+ fk_score: {
2511
+ required: true,
2512
+ source: "preprocessing.fk_score"
2513
+ }
2514
+ }
2515
+ },
2516
+ model: {
2517
+ provider: "google",
2518
+ name: "gemini-3-flash-preview"
2519
+ },
2520
+ generation: {
2521
+ temperature: 0
2522
+ },
2523
+ parser: {
2524
+ kind: "structured_output"
2525
+ },
2526
+ output_binding: "formatted_output"
2527
+ }
2528
+ ]};
2529
+
2530
+ // src/prompts/purpose/index.ts
2531
+ var STEP_ID = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
2532
+ var _step = config_default.steps.find((s) => s.id === STEP_ID);
2533
+ if (!_step) throw new Error(`Step "${STEP_ID}" not found in purpose config.json`);
2534
+ var PLACEHOLDER_KEYS = Object.keys(_step.prompt.placeholders);
2535
+ function applyPlaceholders(template, inputs) {
2536
+ return PLACEHOLDER_KEYS.reduce(
2537
+ (text, key) => key in inputs ? text.replaceAll(`{${key}}`, inputs[key]) : text,
2538
+ template
2539
+ );
2540
+ }
2541
+ function getSystemPrompt5(inputs) {
2542
+ return applyPlaceholders(system_default4, inputs);
2543
+ }
2544
+ function getUserPrompt5(inputs) {
2545
+ return applyPlaceholders(user_default4, inputs);
2546
+ }
2547
+
2548
+ // ../../evals/prompts/purpose/input_schema.json
2549
+ var input_schema_default = {
2550
+ properties: {
2551
+ grade_level: {
2552
+ minimum: 3,
2553
+ maximum: 12}
2554
+ }
2555
+ };
2556
+
2557
+ // src/evaluators/purpose.ts
2558
+ var STEP_ID2 = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
2559
+ var _step2 = config_default.steps.find((s) => s.id === STEP_ID2);
2560
+ if (!_step2) throw new Error(`Step "${STEP_ID2}" not found in purpose config.json`);
2561
+ var STEP = _step2;
2562
+ var GRADE_MIN = input_schema_default.properties.grade_level.minimum;
2563
+ var GRADE_MAX = input_schema_default.properties.grade_level.maximum;
2564
+ var SUPPORTED_GRADES = Array.from({ length: GRADE_MAX - GRADE_MIN + 1 }, (_, i) => String(GRADE_MIN + i));
2565
+ var COMPLEXITY_SCORE_DISPLAY = {
2566
+ "slightly_complex": "Slightly complex",
2567
+ "moderately_complex": "Moderately complex",
2568
+ "very_complex": "Very complex",
2569
+ "exceedingly_complex": "Exceedingly complex",
2570
+ "more_context_needed": "More context needed"
2571
+ };
2572
+ var PurposeEvaluator = class _PurposeEvaluator extends BaseEvaluator {
2573
+ static metadata = {
2574
+ id: config_default.evaluator.id,
2575
+ name: config_default.evaluator.name,
2576
+ description: config_default.evaluator.description,
2577
+ supportedGrades: SUPPORTED_GRADES,
2578
+ defaultProviders: ["google" /* Google */]
2579
+ };
2580
+ static TEMPERATURE = STEP.generation.temperature;
2581
+ static computeFkScore(text) {
2582
+ const fkStep = config_default.preprocessing.find((p) => p.id === "fk_score");
2583
+ if (!fkStep) throw new Error("fk_score preprocessing step not found in purpose config.json");
2584
+ return runPreprocessingStep(text, fkStep.implementation.typescript);
2585
+ }
2586
+ provider;
2587
+ constructor(config) {
2588
+ super(config);
2589
+ this.provider = this.createConfiguredProvider(
2590
+ "google" /* Google */,
2591
+ STEP.model.name,
2592
+ config.googleApiKey
2593
+ );
2594
+ }
2595
+ /**
2596
+ * Evaluate purpose complexity for a given text and grade level
2597
+ *
2598
+ * @param text - The text to evaluate
2599
+ * @param grade - The target grade level (3-12)
2600
+ * @returns Evaluation result with complexity score and detailed analysis
2601
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2602
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2603
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2604
+ */
2605
+ async evaluate(text, grade) {
2606
+ this.logger.info("Starting Purpose evaluation", {
2607
+ evaluator: _PurposeEvaluator.metadata.id,
2608
+ operation: "evaluate",
2609
+ grade,
2610
+ textLength: text.length
2611
+ });
2612
+ const startTime = Date.now();
2613
+ const stageDetails = [];
2614
+ try {
2615
+ this.validateText(text);
2616
+ const gradeNum = this.parseAndValidateGrade(grade);
2617
+ const fkScore = _PurposeEvaluator.computeFkScore(text);
2618
+ const inputs = {
2619
+ text,
2620
+ grade_level: String(gradeNum),
2621
+ fk_score: String(fkScore)
2622
+ };
2623
+ const response = await this.callLLM(inputs);
2624
+ const latencyMs = Date.now() - startTime;
2625
+ const tokenUsage = {
2626
+ input_tokens: response.usage.inputTokens,
2627
+ output_tokens: response.usage.outputTokens
2628
+ };
2629
+ stageDetails.push({
2630
+ stage: STEP.id,
2631
+ provider: this.provider.label,
2632
+ latency_ms: response.latencyMs,
2633
+ token_usage: tokenUsage
2634
+ });
2635
+ const result = {
2636
+ score: COMPLEXITY_SCORE_DISPLAY[response.data.complexity_score],
2637
+ reasoning: response.data.reasoning,
2638
+ metadata: {
2639
+ model: this.provider.label,
2640
+ processingTimeMs: latencyMs
2641
+ },
2642
+ _internal: response.data
2643
+ };
2644
+ this.sendTelemetry({
2645
+ status: "success",
2646
+ latencyMs,
2647
+ textLength: text.length,
2648
+ grade: String(gradeNum),
2649
+ provider: this.provider.label,
2650
+ tokenUsage,
2651
+ metadata: { stage_details: stageDetails },
2652
+ inputText: text
2653
+ }).catch(() => void 0);
2654
+ this.logger.info("Purpose evaluation completed successfully", {
2655
+ evaluator: _PurposeEvaluator.metadata.id,
2656
+ operation: "evaluate",
2657
+ grade: gradeNum,
2658
+ score: result.score,
2659
+ processingTimeMs: latencyMs
2660
+ });
2661
+ return result;
2662
+ } catch (error) {
2663
+ const latencyMs = Date.now() - startTime;
2664
+ this.logger.error("Purpose evaluation failed", {
2665
+ evaluator: _PurposeEvaluator.metadata.id,
2666
+ operation: "evaluate",
2667
+ grade,
2668
+ error: error instanceof Error ? error : void 0,
2669
+ processingTimeMs: latencyMs
2670
+ });
2671
+ const tokenUsage = stageDetails.length > 0 ? {
2672
+ input_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.input_tokens ?? 0), 0),
2673
+ output_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.output_tokens ?? 0), 0)
2674
+ } : void 0;
2675
+ this.sendTelemetry({
2676
+ status: "error",
2677
+ latencyMs,
2678
+ textLength: text.length,
2679
+ grade: String(grade),
2680
+ provider: this.provider.label,
2681
+ tokenUsage,
2682
+ errorCode: error instanceof Error ? error.name : "UnknownError",
2683
+ metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
2684
+ inputText: text
2685
+ }).catch(() => void 0);
2686
+ if (error instanceof ValidationError) throw error;
2687
+ throw wrapProviderError(error, "Purpose evaluation failed");
2688
+ }
2689
+ }
2690
+ parseAndValidateGrade(grade) {
2691
+ const num = Number(grade.trim());
2692
+ if (!Number.isInteger(num) || num < GRADE_MIN || num > GRADE_MAX) {
2693
+ throw new ValidationError(
2694
+ `Invalid grade "${grade}". Purpose evaluator supports integer grades ${GRADE_MIN}\u2013${GRADE_MAX}.`
2695
+ );
2696
+ }
2697
+ return num;
2698
+ }
2699
+ async callLLM(inputs) {
2700
+ const response = await this.provider.generateStructured({
2701
+ messages: [
2702
+ { role: "system", content: getSystemPrompt5(inputs) },
2703
+ { role: "user", content: getUserPrompt5(inputs) }
2704
+ ],
2705
+ schema: PurposeOutputSchema,
2706
+ temperature: _PurposeEvaluator.TEMPERATURE
2707
+ });
2708
+ return { data: response.data, usage: response.usage, latencyMs: response.latencyMs };
2709
+ }
2710
+ };
2711
+ async function evaluatePurpose(text, grade, config) {
2712
+ return new PurposeEvaluator(config).evaluate(text, grade);
2713
+ }
2714
+
2715
+ export { APIError, AuthenticationError, ComplexityClassificationSchema, ConfigurationError, ConventionalityEvaluator, EvaluatorError, GradeBand, GradeLevelAppropriatenessEvaluator, GradeLevelAppropriatenessSchema, LogLevel, NetworkError, Provider, Providers, PurposeEvaluator, RateLimitError, SentenceAnalysisSchema, SentenceStructureEvaluator, SmkEvaluator, TextComplexityEvaluator, TextComplexityLevel, TimeoutError, ValidationError, VocabularyEvaluator, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateConventionality, evaluateGradeLevelAppropriateness, evaluatePurpose, evaluateSentenceStructure, evaluateSmk, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };
2324
2716
  //# sourceMappingURL=index.js.map
2325
2717
  //# sourceMappingURL=index.js.map