@learning-commons/evaluators 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,20 +1,22 @@
1
1
  'use strict';
2
2
 
3
3
  var zod = require('zod');
4
+ var ai = require('ai');
4
5
  var crypto = require('crypto');
5
6
  var fs = require('fs');
6
7
  var path = require('path');
7
8
  var os = require('os');
8
9
  var url = require('url');
9
- var ai = require('ai');
10
10
  var nlp = require('compromise');
11
11
  var syllable = require('syllable');
12
+ var textReadability = require('text-readability');
12
13
  var pLimit = require('p-limit');
13
14
 
14
15
  var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null;
15
16
  function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
16
17
 
17
18
  var nlp__default = /*#__PURE__*/_interopDefault(nlp);
19
+ var textReadability__default = /*#__PURE__*/_interopDefault(textReadability);
18
20
  var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
19
21
 
20
22
  // src/schemas/outputs.ts
@@ -33,6 +35,7 @@ var GradeLevelAppropriatenessSchema = zod.z.object({
33
35
  alternative_grade: GradeBand.describe("An alternative grade level for the text"),
34
36
  scaffolding_needed: zod.z.string().describe("Scaffolding needed for the text to be appropriate for the alternative grade")
35
37
  });
38
+ var PurposeOutputSchema = zod.z.object({ "complexity_score": zod.z.enum(["slightly_complex", "moderately_complex", "very_complex", "exceedingly_complex", "more_context_needed"]).describe("The Purpose complexity level for the target grade."), "reasoning": zod.z.string().describe("A high-level summary of why the text is at this complexity level for the target grade."), "details": zod.z.object({ "detailed_summary": zod.z.array(zod.z.object({ "factor": zod.z.string().describe("The specific text complexity factor identified."), "description": zod.z.string().describe("How this factor manifests in the text."), "effect_on_complexity_dimension": zod.z.string().describe("How this factor affects the reader's ability to understand the text's specific complexity dimension.") }).strict()).describe("Individual complexity factors with descriptions and their effects."), "adjustment_and_scaffolding": zod.z.array(zod.z.object({ "scaffolding_need": zod.z.string().describe("The complexity factor that requires scaffolding."), "suggestion": zod.z.string().describe("A specific instructional strategy to support students with this factor.") }).strict()).describe("Scaffolding strategies to make the text accessible at the target grade."), "recommended_use_cases": zod.z.array(zod.z.object({ "opportunity": zod.z.string().describe("An instructional opportunity related to the text."), "suggestion": zod.z.string().describe("A specific way to leverage this text for that instructional purpose.") }).strict()).describe("Additional instructional opportunities for using this text.") }).strict().describe("Practical instructional details including scaffolding strategies and recommended use cases.") }).strict();
36
39
 
37
40
  // src/errors.ts
38
41
  var EvaluatorError = class extends Error {
@@ -93,8 +96,9 @@ var TimeoutError = class extends APIError {
93
96
  function parseProviderError(error) {
94
97
  if (error instanceof Error) {
95
98
  const message = error.message;
99
+ const err = error;
96
100
  const statusMatch = message.match(/\b(4\d{2}|5\d{2})\b/);
97
- const statusCode = statusMatch ? parseInt(statusMatch[1]) : void 0;
101
+ const statusCode = err.statusCode ?? err.status ?? (statusMatch ? parseInt(statusMatch[1]) : void 0);
98
102
  return {
99
103
  message,
100
104
  statusCode,
@@ -107,6 +111,11 @@ function parseProviderError(error) {
107
111
  }
108
112
  function wrapProviderError(error, defaultMessage = "API request failed") {
109
113
  const { message, statusCode, code } = parseProviderError(error);
114
+ if (statusCode === 404 || statusCode === 400 && /\bmodel\b.*(not found|does not exist|invalid)/i.test(message)) {
115
+ return new ConfigurationError(
116
+ `Model not found or invalid: ${message}. Check the model ID passed to the provider.`
117
+ );
118
+ }
110
119
  if (statusCode === 401 || statusCode === 403) {
111
120
  return new AuthenticationError(
112
121
  message.includes("API key") ? message : "Invalid API key",
@@ -189,6 +198,119 @@ function createLogger(customLogger, level = 2 /* WARN */) {
189
198
  }
190
199
  return new ConsoleLogger(level);
191
200
  }
201
+
202
+ // src/providers/base.ts
203
+ var Providers = {
204
+ google: "google",
205
+ openai: "openai",
206
+ anthropic: "anthropic",
207
+ custom: "custom"
208
+ };
209
+ var VercelAIProvider = class {
210
+ constructor(config) {
211
+ this.config = config;
212
+ if (config.type === "custom") {
213
+ throw new Error(
214
+ "VercelAIProvider does not support custom type. Use config.customProvider directly."
215
+ );
216
+ }
217
+ if (!config.model || config.model.trim() === "") {
218
+ throw new Error(
219
+ `model is required for VercelAIProvider (type: "${config.type}"). No default is assumed.`
220
+ );
221
+ }
222
+ this.model = config.model;
223
+ this.label = `${config.type}:${config.model}`;
224
+ }
225
+ label;
226
+ model;
227
+ /**
228
+ * Generate structured output using Vercel AI SDK's generateText with output
229
+ */
230
+ async generateStructured(request) {
231
+ const model = await this.getModel();
232
+ const startTime = Date.now();
233
+ const { output, usage } = await ai.generateText({
234
+ model,
235
+ messages: request.messages,
236
+ output: ai.Output.object({ schema: request.schema }),
237
+ temperature: request.temperature ?? 0,
238
+ maxRetries: this.config.maxRetries ?? 0,
239
+ ...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
240
+ });
241
+ return {
242
+ data: output,
243
+ model: this.model,
244
+ usage: {
245
+ inputTokens: usage.inputTokens || 0,
246
+ outputTokens: usage.outputTokens || 0
247
+ },
248
+ latencyMs: Date.now() - startTime
249
+ };
250
+ }
251
+ /**
252
+ * Generate plain text using Vercel AI SDK's generateText
253
+ */
254
+ async generateText(messages, temperature) {
255
+ const model = await this.getModel();
256
+ const startTime = Date.now();
257
+ const { text, usage } = await ai.generateText({
258
+ model,
259
+ messages,
260
+ temperature: temperature ?? this.config.temperature ?? 0,
261
+ maxRetries: this.config.maxRetries ?? 0
262
+ });
263
+ return {
264
+ text,
265
+ usage: {
266
+ inputTokens: usage.inputTokens || 0,
267
+ outputTokens: usage.outputTokens || 0
268
+ },
269
+ latencyMs: Date.now() - startTime
270
+ };
271
+ }
272
+ /**
273
+ * Get the configured language model.
274
+ * Uses dynamic imports so consumers only need to install the provider packages they use.
275
+ */
276
+ async getModel() {
277
+ const apiKey = this.config.apiKey;
278
+ switch (this.config.type) {
279
+ case "openai": {
280
+ const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
281
+ throw new Error(
282
+ "To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
283
+ );
284
+ });
285
+ return createOpenAI(apiKey ? { apiKey } : {})(this.model);
286
+ }
287
+ case "anthropic": {
288
+ const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
289
+ throw new Error(
290
+ "To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
291
+ );
292
+ });
293
+ return createAnthropic(apiKey ? { apiKey } : {})(this.model);
294
+ }
295
+ case "google": {
296
+ const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
297
+ throw new Error(
298
+ "To use the Google provider, install its adapter: npm install @ai-sdk/google"
299
+ );
300
+ });
301
+ return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(this.model);
302
+ }
303
+ default:
304
+ throw new Error(`Unsupported provider type: ${this.config.type}`);
305
+ }
306
+ }
307
+ };
308
+ function createProvider(config) {
309
+ if (config.type === "custom" && config.customProvider) {
310
+ return config.customProvider;
311
+ }
312
+ return new VercelAIProvider(config);
313
+ }
192
314
  var SentenceAnalysisSchema = zod.z.object({
193
315
  reasoning: zod.z.string().describe("Step-by-step reasoning for the analysis"),
194
316
  // Foundational
@@ -348,6 +470,12 @@ var VALIDATION_LIMITS = {
348
470
  /** Maximum text length in characters (100K chars ≈ 25K tokens) */
349
471
  MAX_TEXT_LENGTH: 1e5
350
472
  };
473
+ var Provider = /* @__PURE__ */ ((Provider2) => {
474
+ Provider2["OpenAI"] = "openai";
475
+ Provider2["Google"] = "google";
476
+ Provider2["Anthropic"] = "anthropic";
477
+ return Provider2;
478
+ })(Provider || {});
351
479
  var BaseEvaluator = class {
352
480
  telemetryClient;
353
481
  logger;
@@ -365,21 +493,35 @@ var BaseEvaluator = class {
365
493
  * name: 'My Evaluator',
366
494
  * description: 'Does something useful',
367
495
  * supportedGrades: ['3', '4', '5'],
368
- * requiresGoogleKey: true,
369
- * requiresOpenAIKey: false,
496
+ * defaultProviders: [Provider.Google],
370
497
  * };
371
498
  * }
372
499
  * ```
373
500
  */
374
501
  static metadata;
502
+ /**
503
+ * @throws {ConfigurationError} If the subclass has not defined static metadata
504
+ * @throws {ConfigurationError} If modelOverride has an invalid provider or empty model
505
+ * @throws {ConfigurationError} If a required API key is missing
506
+ */
375
507
  constructor(config) {
376
508
  this.logger = createLogger(config.logger, config.logLevel ?? 2 /* WARN */);
509
+ this.validateModelOverride(config);
377
510
  this.validateApiKeys(config);
378
511
  const telemetryConfig = this.normalizeTelemetryConfig(config.telemetry);
379
512
  this.config = {
380
513
  maxRetries: config.maxRetries ?? 2,
381
- telemetry: telemetryConfig
514
+ telemetry: telemetryConfig,
515
+ modelOverride: config.modelOverride,
516
+ googleApiKey: config.googleApiKey,
517
+ openaiApiKey: config.openaiApiKey,
518
+ anthropicApiKey: config.anthropicApiKey
382
519
  };
520
+ if (config.modelOverride) {
521
+ this.logger.warn(
522
+ `modelOverride is active: using ${config.modelOverride.provider}:${config.modelOverride.model} instead of the default model. Evaluation quality may differ from recommended defaults.`
523
+ );
524
+ }
383
525
  if (this.config.telemetry.enabled) {
384
526
  this.telemetryClient = new TelemetryClient({
385
527
  endpoint: "https://api.learningcommons.org/evaluators-telemetry/v1/events",
@@ -404,21 +546,62 @@ var BaseEvaluator = class {
404
546
  return meta;
405
547
  }
406
548
  /**
407
- * Validate that required API keys are provided based on metadata
408
- * @throws {ConfigurationError} If required API keys are missing
549
+ * Validate modelOverride shape: provider must be a known Provider value and
550
+ * model must be a non-empty string.
551
+ * @throws {ConfigurationError} If the override is malformed
409
552
  */
410
- validateApiKeys(config) {
411
- if (this.metadata.requiresGoogleKey && !config.googleApiKey) {
553
+ validateModelOverride(config) {
554
+ if (!config.modelOverride) return;
555
+ const validProviders = Object.values(Provider);
556
+ if (!validProviders.includes(config.modelOverride.provider)) {
412
557
  throw new ConfigurationError(
413
- `Google API key is required for ${this.metadata.name} evaluator. Pass googleApiKey in config.`
558
+ `Invalid provider "${config.modelOverride.provider}" in modelOverride. Valid providers are: ${validProviders.join(", ")}.`
414
559
  );
415
560
  }
416
- if (this.metadata.requiresOpenAIKey && !config.openaiApiKey) {
561
+ if (!config.modelOverride.model || config.modelOverride.model.trim() === "") {
417
562
  throw new ConfigurationError(
418
- `OpenAI API key is required for ${this.metadata.name} evaluator. Pass openaiApiKey in config.`
563
+ `modelOverride.model is required. Specify the model ID for provider "${config.modelOverride.provider}".`
419
564
  );
420
565
  }
421
566
  }
567
+ /**
568
+ * Validate that the required API key is present.
569
+ * When modelOverride is set, checks the override provider's key.
570
+ * Otherwise checks the keys required by the evaluator's default providers.
571
+ * @throws {ConfigurationError} If a required key is missing
572
+ */
573
+ validateApiKeys(config) {
574
+ const keyFor = {
575
+ ["openai" /* OpenAI */]: config.openaiApiKey?.trim() || void 0,
576
+ ["google" /* Google */]: config.googleApiKey?.trim() || void 0,
577
+ ["anthropic" /* Anthropic */]: config.anthropicApiKey?.trim() || void 0
578
+ };
579
+ const humanName = {
580
+ ["openai" /* OpenAI */]: "OpenAI API key",
581
+ ["google" /* Google */]: "Google API key",
582
+ ["anthropic" /* Anthropic */]: "Anthropic API key"
583
+ };
584
+ const configKey = {
585
+ ["openai" /* OpenAI */]: "openaiApiKey",
586
+ ["google" /* Google */]: "googleApiKey",
587
+ ["anthropic" /* Anthropic */]: "anthropicApiKey"
588
+ };
589
+ if (config.modelOverride) {
590
+ if (!keyFor[config.modelOverride.provider]) {
591
+ throw new ConfigurationError(
592
+ `${humanName[config.modelOverride.provider]} is required when using modelOverride with provider "${config.modelOverride.provider}". Pass ${configKey[config.modelOverride.provider]} in config.`
593
+ );
594
+ }
595
+ return;
596
+ }
597
+ for (const provider of this.metadata.defaultProviders) {
598
+ if (!keyFor[provider]) {
599
+ throw new ConfigurationError(
600
+ `${humanName[provider]} is required for ${this.metadata.name} evaluator. Pass ${configKey[provider]} in config.`
601
+ );
602
+ }
603
+ }
604
+ }
422
605
  /**
423
606
  * Normalize telemetry config to standard format
424
607
  */
@@ -492,13 +675,40 @@ var BaseEvaluator = class {
492
675
  const validList = Array.from(validGrades).sort((a, b) => {
493
676
  if (a === "K") return -1;
494
677
  if (b === "K") return 1;
495
- return parseInt(a) - parseInt(b);
678
+ return parseInt(a, 10) - parseInt(b, 10);
496
679
  }).join(", ");
497
680
  throw new ValidationError(
498
681
  `Invalid grade "${grade}". Supported grades for this evaluator: ${validList}`
499
682
  );
500
683
  }
501
684
  }
685
+ /**
686
+ * Create an LLM provider, honouring modelOverride if set.
687
+ * When override is active, the key for the override provider is resolved
688
+ * from the matching top-level config field (e.g. anthropicApiKey for Anthropic).
689
+ */
690
+ createConfiguredProvider(defaultType, defaultModel, defaultApiKey) {
691
+ const override = this.config.modelOverride;
692
+ if (override) {
693
+ const apiKeyFor = {
694
+ ["openai" /* OpenAI */]: this.config.openaiApiKey,
695
+ ["google" /* Google */]: this.config.googleApiKey,
696
+ ["anthropic" /* Anthropic */]: this.config.anthropicApiKey
697
+ };
698
+ return createProvider({
699
+ type: override.provider,
700
+ model: override.model,
701
+ apiKey: apiKeyFor[override.provider],
702
+ maxRetries: this.config.maxRetries
703
+ });
704
+ }
705
+ return createProvider({
706
+ type: defaultType,
707
+ model: defaultModel,
708
+ apiKey: defaultApiKey,
709
+ maxRetries: this.config.maxRetries
710
+ });
711
+ }
502
712
  /**
503
713
  * Send telemetry event to analytics service
504
714
  * Common helper for all evaluators
@@ -519,123 +729,12 @@ var BaseEvaluator = class {
519
729
  provider: params.provider,
520
730
  token_usage: params.tokenUsage,
521
731
  metadata: params.metadata,
732
+ model_override: this.config.modelOverride ? true : void 0,
522
733
  // Include input text only if recording is enabled
523
734
  input_text: this.config.telemetry.recordInputs ? params.inputText : void 0
524
735
  });
525
736
  }
526
737
  };
527
- var DEFAULT_MODELS = {
528
- openai: "gpt-4o",
529
- anthropic: "claude-sonnet-4-5-20250929",
530
- google: "gemini-2.5-pro"
531
- };
532
- var VercelAIProvider = class {
533
- constructor(config) {
534
- this.config = config;
535
- if (config.type === "custom") {
536
- throw new Error(
537
- "VercelAIProvider does not support custom type. Use config.customProvider directly."
538
- );
539
- }
540
- }
541
- /**
542
- * Generate structured output using Vercel AI SDK's generateText with output
543
- */
544
- async generateStructured(request) {
545
- const model = await this.getModel(request.model);
546
- const startTime = Date.now();
547
- const { output, usage } = await ai.generateText({
548
- model,
549
- messages: request.messages,
550
- output: ai.Output.object({ schema: request.schema }),
551
- temperature: request.temperature ?? 0,
552
- maxRetries: this.config.maxRetries ?? 0,
553
- ...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
554
- });
555
- return {
556
- data: output,
557
- model: request.model || this.getDefaultModel(),
558
- usage: {
559
- inputTokens: usage.inputTokens || 0,
560
- outputTokens: usage.outputTokens || 0
561
- },
562
- latencyMs: Date.now() - startTime
563
- };
564
- }
565
- /**
566
- * Generate plain text using Vercel AI SDK's generateText
567
- */
568
- async generateText(messages, temperature) {
569
- const model = await this.getModel();
570
- const startTime = Date.now();
571
- const { text, usage } = await ai.generateText({
572
- model,
573
- messages,
574
- temperature: temperature ?? this.config.temperature ?? 0,
575
- maxRetries: this.config.maxRetries ?? 0
576
- });
577
- return {
578
- text,
579
- usage: {
580
- inputTokens: usage.inputTokens || 0,
581
- outputTokens: usage.outputTokens || 0
582
- },
583
- latencyMs: Date.now() - startTime
584
- };
585
- }
586
- /**
587
- * Get the configured language model.
588
- * Uses dynamic imports so consumers only need to install the provider packages they use.
589
- */
590
- async getModel(requestModel) {
591
- const modelId = requestModel || this.config.model || this.getDefaultModel();
592
- const apiKey = this.config.apiKey;
593
- switch (this.config.type) {
594
- case "openai": {
595
- const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
596
- throw new Error(
597
- "To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
598
- );
599
- });
600
- return createOpenAI(apiKey ? { apiKey } : {})(modelId);
601
- }
602
- case "anthropic": {
603
- const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
604
- throw new Error(
605
- "To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
606
- );
607
- });
608
- return createAnthropic(apiKey ? { apiKey } : {})(modelId);
609
- }
610
- case "google": {
611
- const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
612
- throw new Error(
613
- "To use the Google provider, install its adapter: npm install @ai-sdk/google"
614
- );
615
- });
616
- return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(modelId);
617
- }
618
- default:
619
- throw new Error(`Unsupported provider type: ${this.config.type}`);
620
- }
621
- }
622
- /**
623
- * Get default model for the configured provider
624
- */
625
- getDefaultModel() {
626
- const providerType = this.config.type;
627
- if (providerType === "custom") {
628
- throw new Error("Cannot get default model for custom provider type");
629
- }
630
- return DEFAULT_MODELS[providerType];
631
- }
632
- };
633
- function createProvider(config) {
634
- if (config.type === "custom" && config.customProvider) {
635
- return config.customProvider;
636
- }
637
- return new VercelAIProvider(config);
638
- }
639
738
  var VocabularyComplexitySchema = zod.z.object({
640
739
  tier_2_words: zod.z.string().describe("List of Tier 2 words (academic words)"),
641
740
  tier_3_words: zod.z.string().describe("List of Tier 3 words (domain-specific)"),
@@ -829,6 +928,44 @@ function featuresToJSON(features, decimals = 1, castToInt = true) {
829
928
  }
830
929
  return JSON.stringify(payload, null, 2);
831
930
  }
931
+ var LIBRARY_ADAPTERS = {
932
+ "text-readability": {
933
+ call(fnName, text) {
934
+ const fn = textReadability__default.default[fnName];
935
+ if (typeof fn !== "function") {
936
+ throw new Error(`Function "${fnName}" not found in text-readability.`);
937
+ }
938
+ return fn.call(textReadability__default.default, text);
939
+ }
940
+ }
941
+ };
942
+ var POST_TRANSFORMS = {
943
+ round(value, { precision = 0 }) {
944
+ const factor = 10 ** precision;
945
+ return Math.round(value * factor) / factor;
946
+ }
947
+ };
948
+ function runPreprocessingStep(text, impl) {
949
+ const adapter = LIBRARY_ADAPTERS[impl.library];
950
+ if (!adapter) {
951
+ const supported = Object.keys(LIBRARY_ADAPTERS).join(", ");
952
+ throw new Error(
953
+ `Unsupported preprocessing library "${impl.library}". Supported: ${supported}.`
954
+ );
955
+ }
956
+ let result = adapter.call(impl.function, text);
957
+ if (impl.post_transform) {
958
+ const transform = POST_TRANSFORMS[impl.post_transform.type];
959
+ if (!transform) {
960
+ const supported = Object.keys(POST_TRANSFORMS).join(", ");
961
+ throw new Error(
962
+ `Unsupported post_transform type "${impl.post_transform.type}". Supported: ${supported}.`
963
+ );
964
+ }
965
+ result = transform(result, impl.post_transform);
966
+ }
967
+ return result;
968
+ }
832
969
 
833
970
  // ../../evals/prompts/vocabulary/background-knowledge.txt
834
971
  var background_knowledge_default = `
@@ -1134,32 +1271,28 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1134
1271
  name: "Vocabulary",
1135
1272
  description: "Evaluates vocabulary complexity of educational texts relative to grade level",
1136
1273
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1137
- requiresGoogleKey: true,
1138
- requiresOpenAIKey: true
1274
+ defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
1139
1275
  };
1140
1276
  grades34ComplexityProvider;
1141
1277
  otherGradesComplexityProvider;
1142
1278
  backgroundKnowledgeProvider;
1143
1279
  constructor(config) {
1144
1280
  super(config);
1145
- this.grades34ComplexityProvider = createProvider({
1146
- type: "google",
1147
- model: "gemini-2.5-pro",
1148
- apiKey: config.googleApiKey,
1149
- maxRetries: this.config.maxRetries
1150
- });
1151
- this.otherGradesComplexityProvider = createProvider({
1152
- type: "openai",
1153
- model: "gpt-4.1-2025-04-14",
1154
- apiKey: config.openaiApiKey,
1155
- maxRetries: this.config.maxRetries
1156
- });
1157
- this.backgroundKnowledgeProvider = createProvider({
1158
- type: "openai",
1159
- model: "gpt-4o-2024-11-20",
1160
- apiKey: config.openaiApiKey,
1161
- maxRetries: this.config.maxRetries
1162
- });
1281
+ this.grades34ComplexityProvider = this.createConfiguredProvider(
1282
+ "google" /* Google */,
1283
+ "gemini-2.5-pro",
1284
+ config.googleApiKey
1285
+ );
1286
+ this.otherGradesComplexityProvider = this.createConfiguredProvider(
1287
+ "openai" /* OpenAI */,
1288
+ "gpt-4.1-2025-04-14",
1289
+ config.openaiApiKey
1290
+ );
1291
+ this.backgroundKnowledgeProvider = this.createConfiguredProvider(
1292
+ "openai" /* OpenAI */,
1293
+ "gpt-4o-2024-11-20",
1294
+ config.openaiApiKey
1295
+ );
1163
1296
  }
1164
1297
  /**
1165
1298
  * Evaluate vocabulary complexity for a given text and grade level
@@ -1168,6 +1301,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1168
1301
  * @param grade - The target grade level (3-12)
1169
1302
  * @returns Evaluation result with complexity score and detailed analysis
1170
1303
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1304
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1171
1305
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1172
1306
  */
1173
1307
  async evaluate(text, grade) {
@@ -1179,7 +1313,9 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1179
1313
  });
1180
1314
  const startTime = Date.now();
1181
1315
  const stageDetails = [];
1182
- const complexityProviderName = grade === "3" || grade === "4" ? "google:gemini-2.5-pro" : "openai:gpt-4.1-2025-04-14";
1316
+ const complexityProviderLabel = grade === "3" || grade === "4" ? this.grades34ComplexityProvider.label : this.otherGradesComplexityProvider.label;
1317
+ const backgroundProviderLabel = this.backgroundKnowledgeProvider.label;
1318
+ const modelLabel = this.config.modelOverride ? backgroundProviderLabel : `${backgroundProviderLabel}+${complexityProviderLabel}`;
1183
1319
  try {
1184
1320
  this.validateText(text);
1185
1321
  this.validateGrade(grade, new Set(_VocabularyEvaluator.metadata.supportedGrades));
@@ -1190,7 +1326,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1190
1326
  const bgResponse = await this.getBackgroundKnowledgeAssumption(text, grade);
1191
1327
  stageDetails.push({
1192
1328
  stage: "background_knowledge",
1193
- provider: "openai:gpt-4o-2024-11-20",
1329
+ provider: backgroundProviderLabel,
1194
1330
  latency_ms: bgResponse.latencyMs,
1195
1331
  token_usage: {
1196
1332
  input_tokens: bgResponse.usage.inputTokens,
@@ -1206,7 +1342,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1206
1342
  );
1207
1343
  stageDetails.push({
1208
1344
  stage: "complexity_evaluation",
1209
- provider: complexityProviderName,
1345
+ provider: complexityProviderLabel,
1210
1346
  latency_ms: complexityResponse.latencyMs,
1211
1347
  token_usage: {
1212
1348
  input_tokens: complexityResponse.usage.inputTokens,
@@ -1222,7 +1358,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1222
1358
  score: complexityResponse.data.complexity_score,
1223
1359
  reasoning: complexityResponse.data.reasoning,
1224
1360
  metadata: {
1225
- model: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1361
+ model: modelLabel,
1226
1362
  processingTimeMs: latencyMs
1227
1363
  },
1228
1364
  _internal: complexityResponse.data
@@ -1232,7 +1368,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1232
1368
  latencyMs,
1233
1369
  textLength: text.length,
1234
1370
  grade,
1235
- provider: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1371
+ provider: modelLabel,
1236
1372
  tokenUsage: totalTokenUsage,
1237
1373
  metadata: {
1238
1374
  stage_details: stageDetails
@@ -1267,7 +1403,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
1267
1403
  latencyMs,
1268
1404
  textLength: text.length,
1269
1405
  grade,
1270
- provider: `openai:gpt-4o-2024-11-20 + ${complexityProviderName}`,
1406
+ provider: modelLabel,
1271
1407
  tokenUsage: totalTokenUsage,
1272
1408
  errorCode: error instanceof Error ? error.name : "UnknownError",
1273
1409
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -1438,25 +1574,12 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1438
1574
  name: "Sentence Structure",
1439
1575
  description: "Evaluates sentence structure complexity based on grammatical features",
1440
1576
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1441
- requiresGoogleKey: false,
1442
- requiresOpenAIKey: true
1577
+ defaultProviders: ["openai" /* OpenAI */]
1443
1578
  };
1444
- analysisProvider;
1445
- complexityProvider;
1579
+ provider;
1446
1580
  constructor(config) {
1447
1581
  super(config);
1448
- this.analysisProvider = createProvider({
1449
- type: "openai",
1450
- model: "gpt-4o",
1451
- apiKey: config.openaiApiKey,
1452
- maxRetries: this.config.maxRetries
1453
- });
1454
- this.complexityProvider = createProvider({
1455
- type: "openai",
1456
- model: "gpt-4o",
1457
- apiKey: config.openaiApiKey,
1458
- maxRetries: this.config.maxRetries
1459
- });
1582
+ this.provider = this.createConfiguredProvider("openai" /* OpenAI */, "gpt-4o", config.openaiApiKey);
1460
1583
  }
1461
1584
  /**
1462
1585
  * Evaluate sentence structure complexity for a given text and grade level
@@ -1465,6 +1588,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1465
1588
  * @param grade - The target grade level (3-12)
1466
1589
  * @returns Evaluation result with complexity score and detailed analysis
1467
1590
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1591
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1468
1592
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1469
1593
  */
1470
1594
  async evaluate(text, grade) {
@@ -1486,7 +1610,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1486
1610
  const analysisResponse = await this.analyzeSentenceStructure(text);
1487
1611
  stageDetails.push({
1488
1612
  stage: "sentence_analysis",
1489
- provider: "openai:gpt-4o",
1613
+ provider: this.provider.label,
1490
1614
  latency_ms: analysisResponse.latencyMs,
1491
1615
  token_usage: {
1492
1616
  input_tokens: analysisResponse.usage.inputTokens,
@@ -1501,7 +1625,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1501
1625
  const complexityResponse = await this.classifyComplexity(features, grade, text);
1502
1626
  stageDetails.push({
1503
1627
  stage: "complexity_classification",
1504
- provider: "openai:gpt-4o",
1628
+ provider: this.provider.label,
1505
1629
  latency_ms: complexityResponse.latencyMs,
1506
1630
  token_usage: {
1507
1631
  input_tokens: complexityResponse.usage.inputTokens,
@@ -1517,7 +1641,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1517
1641
  score: complexityResponse.data.answer,
1518
1642
  reasoning: complexityResponse.data.reasoning,
1519
1643
  metadata: {
1520
- model: "openai:gpt-4o",
1644
+ model: this.provider.label,
1521
1645
  processingTimeMs: latencyMs
1522
1646
  },
1523
1647
  _internal: {
@@ -1531,7 +1655,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1531
1655
  latencyMs,
1532
1656
  textLength: text.length,
1533
1657
  grade,
1534
- provider: "openai:gpt-4o",
1658
+ provider: this.provider.label,
1535
1659
  tokenUsage: totalTokenUsage,
1536
1660
  metadata: {
1537
1661
  stage_details: stageDetails
@@ -1566,7 +1690,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1566
1690
  latencyMs,
1567
1691
  textLength: text.length,
1568
1692
  grade,
1569
- provider: "openai:gpt-4o",
1693
+ provider: this.provider.label,
1570
1694
  tokenUsage: totalTokenUsage,
1571
1695
  errorCode: error instanceof Error ? error.name : "UnknownError",
1572
1696
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -1594,7 +1718,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1594
1718
  `flesch_kincaid_grade: ${metrics.fleschKincaidGrade}`
1595
1719
  ].join("\n");
1596
1720
  const userPrompt = getUserPromptAnalysis(text, gtCountsStr);
1597
- const response = await this.analysisProvider.generateStructured({
1721
+ const response = await this.provider.generateStructured({
1598
1722
  messages: [
1599
1723
  { role: "system", content: getSystemPromptAnalysis() },
1600
1724
  { role: "user", content: userPrompt }
@@ -1616,7 +1740,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
1616
1740
  async classifyComplexity(features, grade, excerpt) {
1617
1741
  const featuresJSON = featuresToJSON(features, 1, true);
1618
1742
  const userPrompt = getUserPromptComplexity(featuresJSON, grade, excerpt);
1619
- const response = await this.complexityProvider.generateStructured({
1743
+ const response = await this.provider.generateStructured({
1620
1744
  messages: [
1621
1745
  { role: "system", content: getSystemPromptComplexity() },
1622
1746
  { role: "user", content: userPrompt }
@@ -1667,18 +1791,16 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1667
1791
  description: "Determines appropriate grade level for text with scaffolding recommendations",
1668
1792
  supportedGrades: [],
1669
1793
  // No grade parameter required - evaluates what grade the text is appropriate for
1670
- requiresGoogleKey: true,
1671
- requiresOpenAIKey: false
1794
+ defaultProviders: ["google" /* Google */]
1672
1795
  };
1673
1796
  provider;
1674
1797
  constructor(config) {
1675
1798
  super(config);
1676
- this.provider = createProvider({
1677
- type: "google",
1678
- model: "gemini-2.5-pro",
1679
- apiKey: config.googleApiKey,
1680
- maxRetries: this.config.maxRetries
1681
- });
1799
+ this.provider = this.createConfiguredProvider(
1800
+ "google" /* Google */,
1801
+ "gemini-2.5-pro",
1802
+ config.googleApiKey
1803
+ );
1682
1804
  }
1683
1805
  /**
1684
1806
  * Evaluate grade level appropriateness for a given text
@@ -1686,6 +1808,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1686
1808
  * @param text - The text to evaluate
1687
1809
  * @returns Evaluation result with grade recommendations and scaffolding suggestions
1688
1810
  * @throws {ValidationError} If text is empty or too short/long
1811
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1689
1812
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1690
1813
  */
1691
1814
  async evaluate(text) {
@@ -1719,7 +1842,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1719
1842
  score: response.data.grade,
1720
1843
  reasoning: response.data.reasoning,
1721
1844
  metadata: {
1722
- model: "google:gemini-2.5-pro",
1845
+ model: this.provider.label,
1723
1846
  processingTimeMs: latencyMs
1724
1847
  },
1725
1848
  _internal: response.data
@@ -1728,7 +1851,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1728
1851
  status: "success",
1729
1852
  latencyMs,
1730
1853
  textLength: text.length,
1731
- provider: "google:gemini-2.5-pro",
1854
+ provider: this.provider.label,
1732
1855
  tokenUsage,
1733
1856
  // No metadata.stage_details for single-stage evaluator
1734
1857
  inputText: text
@@ -1753,7 +1876,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
1753
1876
  status: "error",
1754
1877
  latencyMs,
1755
1878
  textLength: text.length,
1756
- provider: "google:gemini-2.5-pro",
1879
+ provider: this.provider.label,
1757
1880
  errorCode: error instanceof Error ? error.name : "UnknownError",
1758
1881
  inputText: text
1759
1882
  }).catch(() => {
@@ -1868,18 +1991,16 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1868
1991
  name: "Subject Matter Knowledge",
1869
1992
  description: "Evaluates background knowledge demands of educational texts relative to grade level",
1870
1993
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1871
- requiresGoogleKey: true,
1872
- requiresOpenAIKey: false
1994
+ defaultProviders: ["google" /* Google */]
1873
1995
  };
1874
1996
  provider;
1875
1997
  constructor(config) {
1876
1998
  super(config);
1877
- this.provider = createProvider({
1878
- type: "google",
1879
- model: "gemini-3-flash-preview",
1880
- apiKey: config.googleApiKey,
1881
- maxRetries: this.config.maxRetries
1882
- });
1999
+ this.provider = this.createConfiguredProvider(
2000
+ "google" /* Google */,
2001
+ "gemini-3-flash-preview",
2002
+ config.googleApiKey
2003
+ );
1883
2004
  }
1884
2005
  /**
1885
2006
  * Evaluate subject matter knowledge complexity for a given text and grade level
@@ -1888,6 +2009,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1888
2009
  * @param grade - The target grade level (3-12)
1889
2010
  * @returns Evaluation result with complexity score and detailed analysis
1890
2011
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2012
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
1891
2013
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1892
2014
  */
1893
2015
  async evaluate(text, grade) {
@@ -1910,7 +2032,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1910
2032
  const response = await this.evaluateSmk(text, grade, fkScore);
1911
2033
  stageDetails.push({
1912
2034
  stage: "smk_evaluation",
1913
- provider: "google:gemini-3-flash-preview",
2035
+ provider: this.provider.label,
1914
2036
  latency_ms: response.latencyMs,
1915
2037
  token_usage: {
1916
2038
  input_tokens: response.usage.inputTokens,
@@ -1926,7 +2048,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1926
2048
  score: response.data.complexity_score,
1927
2049
  reasoning: response.data.reasoning,
1928
2050
  metadata: {
1929
- model: "google:gemini-3-flash-preview",
2051
+ model: this.provider.label,
1930
2052
  processingTimeMs: latencyMs
1931
2053
  },
1932
2054
  _internal: response.data
@@ -1936,7 +2058,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1936
2058
  latencyMs,
1937
2059
  textLength: text.length,
1938
2060
  grade,
1939
- provider: "google:gemini-3-flash-preview",
2061
+ provider: this.provider.label,
1940
2062
  tokenUsage: totalTokenUsage,
1941
2063
  metadata: {
1942
2064
  stage_details: stageDetails
@@ -1971,7 +2093,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1971
2093
  latencyMs,
1972
2094
  textLength: text.length,
1973
2095
  grade,
1974
- provider: "google:gemini-3-flash-preview",
2096
+ provider: this.provider.label,
1975
2097
  tokenUsage: totalTokenUsage,
1976
2098
  errorCode: error instanceof Error ? error.name : "UnknownError",
1977
2099
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -2079,18 +2201,16 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2079
2201
  name: "Conventionality",
2080
2202
  description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
2081
2203
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
2082
- requiresGoogleKey: true,
2083
- requiresOpenAIKey: false
2204
+ defaultProviders: ["google" /* Google */]
2084
2205
  };
2085
2206
  provider;
2086
2207
  constructor(config) {
2087
2208
  super(config);
2088
- this.provider = createProvider({
2089
- type: "google",
2090
- model: "gemini-3-flash-preview",
2091
- apiKey: config.googleApiKey,
2092
- maxRetries: this.config.maxRetries
2093
- });
2209
+ this.provider = this.createConfiguredProvider(
2210
+ "google" /* Google */,
2211
+ "gemini-3-flash-preview",
2212
+ config.googleApiKey
2213
+ );
2094
2214
  }
2095
2215
  /**
2096
2216
  * Evaluate conventionality complexity for a given text and grade level
@@ -2099,6 +2219,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2099
2219
  * @param grade - The target grade level (3-12)
2100
2220
  * @returns Evaluation result with complexity score and detailed analysis
2101
2221
  * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2222
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2102
2223
  * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2103
2224
  */
2104
2225
  async evaluate(text, grade) {
@@ -2121,7 +2242,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2121
2242
  const response = await this.evaluateConventionality(text, grade, fkScore);
2122
2243
  stageDetails.push({
2123
2244
  stage: "conventionality_evaluation",
2124
- provider: "google:gemini-3-flash-preview",
2245
+ provider: this.provider.label,
2125
2246
  latency_ms: response.latencyMs,
2126
2247
  token_usage: {
2127
2248
  input_tokens: response.usage.inputTokens,
@@ -2137,7 +2258,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2137
2258
  score: response.data.complexity_score,
2138
2259
  reasoning: response.data.reasoning,
2139
2260
  metadata: {
2140
- model: "google:gemini-3-flash-preview",
2261
+ model: this.provider.label,
2141
2262
  processingTimeMs: latencyMs
2142
2263
  },
2143
2264
  _internal: response.data
@@ -2147,7 +2268,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2147
2268
  latencyMs,
2148
2269
  textLength: text.length,
2149
2270
  grade,
2150
- provider: "google:gemini-3-flash-preview",
2271
+ provider: this.provider.label,
2151
2272
  tokenUsage: totalTokenUsage,
2152
2273
  metadata: {
2153
2274
  stage_details: stageDetails
@@ -2182,7 +2303,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
2182
2303
  latencyMs,
2183
2304
  textLength: text.length,
2184
2305
  grade,
2185
- provider: "google:gemini-3-flash-preview",
2306
+ provider: this.provider.label,
2186
2307
  tokenUsage: totalTokenUsage,
2187
2308
  errorCode: error instanceof Error ? error.name : "UnknownError",
2188
2309
  metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
@@ -2224,8 +2345,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2224
2345
  name: "Text Complexity",
2225
2346
  description: "Composite evaluator analyzing vocabulary, sentence structure, subject matter knowledge, and conventionality complexity",
2226
2347
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
2227
- requiresGoogleKey: true,
2228
- requiresOpenAIKey: true
2348
+ defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
2229
2349
  };
2230
2350
  vocabularyEvaluator;
2231
2351
  sentenceStructureEvaluator;
@@ -2250,7 +2370,8 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2250
2370
  * @param text - The text to evaluate
2251
2371
  * @param grade - The target grade level (3-12)
2252
2372
  * @returns Map of sub-evaluator results
2253
- * @throws {ValidationError} If text is empty or grade is invalid
2373
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2374
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2254
2375
  * @throws {Error} If all sub-evaluators fail
2255
2376
  */
2256
2377
  async evaluate(text, grade) {
@@ -2297,7 +2418,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2297
2418
  latencyMs,
2298
2419
  textLength: text.length,
2299
2420
  grade,
2300
- provider: "composite:google+openai",
2421
+ provider: this.config.modelOverride ? `${this.config.modelOverride.provider}:${this.config.modelOverride.model}` : "composite:google+openai",
2301
2422
  errorCode: hasFailures ? "PartialFailure" : void 0,
2302
2423
  inputText: text
2303
2424
  }).catch(() => {
@@ -2328,6 +2449,278 @@ async function evaluateTextComplexity(text, grade, config) {
2328
2449
  return evaluator.evaluate(text, grade);
2329
2450
  }
2330
2451
 
2452
+ // ../../evals/prompts/purpose/system.txt
2453
+ var system_default4 = '\n Role\n You are an expert reading assessment evaluator. Your task is to determine the Text Complexity of a given passage based exclusively on the Purpose dimension of the qualitative measures rubric.\n\n Task Details\n You will be provided with an informational or literary `text`, along with its `grade_level` and `fk_score` (Flesch-Kincaid). You must analyze the text and determine how difficult it is for a reader to identify the author\'s purpose. \n\n Crucially, you must distinguish between the text\'s *topic* (what it is about) and its *purpose* (why the author wrote it). \n\n Rubric: Purpose Complexity\n Exceedingly Complex: Subtle and intricate, difficult to determine; includes many theoretical or abstract elements.\n Very Complex: Implicit or subtle but fairly easy to infer; more theoretical or abstract than concrete.\n Moderately Complex: Implied but easy to identify based upon context or source.\n Slightly Complex: Explicitly stated, clear, concrete, narrowly focused.\n More Context Needed: The text is a fragment or lacks necessary introductory context, making the true purpose impossible to determine accurately without external background knowledge.\n\n Expert Rules for Evaluating Purpose\n Based on expert consensus and historical grading corrections, you must apply the following heuristics:\n\n 1. The "Slightly Complex" Benchmark (Straightforward and Explicit)\n A text is Slightly Complex if its purpose is explicitly stated or if its informative intent is straightforward, clear, concrete, and directly answers what the text is immediately about. If the text opens by clearly identifying a concrete topic (e.g., "Pins are made of either brass or iron wire") and rigidly follows through by explaining factual, practical information or a process (like manufacturing steps or geographic facts), the purpose is considered explicit and straightforward. It does *not* require a literal statement like "The purpose of this text is to..." as long as the delivery of information is direct, clear, and unadorned by persuasive elements or complex framing.\n\n 2. Moderately Complex via Guiding Questions & Inquiry Formats\n If a text begins with a general introduction and uses guiding questions (e.g., "Have you ever wondered how clouds are formed?") to transition into an explanation, the purpose is implied rather than explicitly stated upfront. Because the reader must recognize the question as the pivot point for the author\'s intent, it is Moderately Complex.\n\n 3. Moderately Complex via Multiple Distinct Informational Goals\n If a text covers a broad topic but jumps between several distinct scientific or informational objectives without an overarching framing device or explicit thesis (e.g., talking about measuring ice sheets, then mapping, then finding meteorites), the reader must synthesize these diverse facts to recognize the broader purpose, making it Moderately Complex.\n\n 4. Moderately Complex via Arguments Disguised as Information\n If an author is arguing a specific point, correcting a misconception, or defending a stance, but the text could initially be mistaken by students as purely informative factual text, it is Moderately Complex. The reader must infer the persuasive intent or argumentative purpose beneath the informative tone.\n\n 5. "More Context Needed" for Fragments\n If a text is a fragment missing a crucial introduction or context, and identifying the author\'s purpose beyond a simple surface-level description would be exceptionally difficult for a reader in the target grade level without that external background, score it as `more_context_needed`. \n\n Output Format\n Provide your evaluation in the following structure:\n reasoning:\n - Surface Analysis: Identify if the text clearly identifies its topic and delivers straightforward facts, or if it utilizes structural cues, titles, or direct thesis statements.\n - Subtlety & Framing: Is the informative purpose straightforward and concrete? Does it use guiding questions? Is it an argument disguised as pure information? Are there multiple distinct informational goals requiring synthesis?\n - Context Check: Is this text a fragment missing crucial context that obscures the deeper purpose for the target grade level?\n - Rubric Alignment: Explain how the text aligns with the specific language of the rubric, explicitly referencing the expert rules above. Justify why it isn\'t one level simpler or more complex.\n\n answer:\n - complexity_score: (slightly_complex, moderately_complex, very_complex, exceedingly_complex, more_context_needed)\n - reasoning: A brief summary of your final decision.\n - details: Structured breakdown of PurposeDetails including detailed_summary, adjustment_and_scaffolding, and recommended_use_cases.\n';
2454
+
2455
+ // ../../evals/prompts/purpose/user.txt
2456
+ var user_default4 = "Analyze:\nText: {text}\nGrade: {grade_level}\nFK Score: {fk_score}";
2457
+
2458
+ // ../../evals/prompts/purpose/config.json
2459
+ var config_default = {
2460
+ evaluator: {
2461
+ id: "literacy.gla.purpose",
2462
+ name: "Purpose Dimension Text Complexity Evaluator",
2463
+ description: "Evaluates the Purpose dimension of qualitative text complexity for K-12 reading assessment, producing a 5-level rubric rating with structured pedagogical detail."
2464
+ },
2465
+ preprocessing: [
2466
+ {
2467
+ id: "fk_score",
2468
+ kind: "flesch_kincaid_grade",
2469
+ description: "Compute the Flesch-Kincaid Grade Level for the input text and bind it to {fk_score} in the prompt.",
2470
+ input: "text",
2471
+ output: "fk_score",
2472
+ implementation: {
2473
+ python: {
2474
+ library: "textstat",
2475
+ function: "flesch_kincaid_grade",
2476
+ post_transform: {
2477
+ type: "round",
2478
+ precision: 2
2479
+ }
2480
+ },
2481
+ typescript: {
2482
+ library: "text-readability",
2483
+ function: "fleschKincaidGrade",
2484
+ post_transform: {
2485
+ type: "round",
2486
+ precision: 2
2487
+ }
2488
+ }
2489
+ }
2490
+ }
2491
+ ],
2492
+ steps: [
2493
+ {
2494
+ id: "evaluate_purpose",
2495
+ description: "Single-call LLM step that produces the EvaluatorOutput JSON.",
2496
+ prompt: {
2497
+ type: "chat",
2498
+ messages: [
2499
+ {
2500
+ role: "system",
2501
+ source_path: "system.txt",
2502
+ sha256: "745b95b7d54dc845b99363c9d3360355381883c22a5f6a0f305d7349cae38a54"
2503
+ },
2504
+ {
2505
+ role: "user",
2506
+ source_path: "user.txt",
2507
+ sha256: "cd8e6347db1a55d104e34436f8f66e833bd6583645d4786a554aaefdd26479b2"
2508
+ }
2509
+ ],
2510
+ placeholders: {
2511
+ text: {
2512
+ required: true,
2513
+ source: "input"
2514
+ },
2515
+ grade_level: {
2516
+ required: true,
2517
+ source: "input"
2518
+ },
2519
+ fk_score: {
2520
+ required: true,
2521
+ source: "preprocessing.fk_score"
2522
+ }
2523
+ }
2524
+ },
2525
+ model: {
2526
+ provider: "google",
2527
+ name: "gemini-3-flash-preview"
2528
+ },
2529
+ generation: {
2530
+ temperature: 0
2531
+ },
2532
+ parser: {
2533
+ kind: "structured_output"
2534
+ },
2535
+ output_binding: "formatted_output"
2536
+ }
2537
+ ]};
2538
+
2539
+ // src/prompts/purpose/index.ts
2540
+ var STEP_ID = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
2541
+ var _step = config_default.steps.find((s) => s.id === STEP_ID);
2542
+ if (!_step) throw new Error(`Step "${STEP_ID}" not found in purpose config.json`);
2543
+ var PLACEHOLDER_KEYS = Object.keys(_step.prompt.placeholders);
2544
+ function applyPlaceholders(template, inputs) {
2545
+ return PLACEHOLDER_KEYS.reduce(
2546
+ (text, key) => key in inputs ? text.replaceAll(`{${key}}`, inputs[key]) : text,
2547
+ template
2548
+ );
2549
+ }
2550
+ function getSystemPrompt5(inputs) {
2551
+ return applyPlaceholders(system_default4, inputs);
2552
+ }
2553
+ function getUserPrompt5(inputs) {
2554
+ return applyPlaceholders(user_default4, inputs);
2555
+ }
2556
+
2557
+ // ../../evals/prompts/purpose/input_schema.json
2558
+ var input_schema_default = {
2559
+ properties: {
2560
+ grade_level: {
2561
+ minimum: 3,
2562
+ maximum: 12}
2563
+ }
2564
+ };
2565
+
2566
+ // src/evaluators/purpose.ts
2567
+ var STEP_ID2 = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
2568
+ var _step2 = config_default.steps.find((s) => s.id === STEP_ID2);
2569
+ if (!_step2) throw new Error(`Step "${STEP_ID2}" not found in purpose config.json`);
2570
+ var STEP = _step2;
2571
+ var GRADE_MIN = input_schema_default.properties.grade_level.minimum;
2572
+ var GRADE_MAX = input_schema_default.properties.grade_level.maximum;
2573
+ var SUPPORTED_GRADES = Array.from({ length: GRADE_MAX - GRADE_MIN + 1 }, (_, i) => String(GRADE_MIN + i));
2574
+ var COMPLEXITY_SCORE_DISPLAY = {
2575
+ "slightly_complex": "Slightly complex",
2576
+ "moderately_complex": "Moderately complex",
2577
+ "very_complex": "Very complex",
2578
+ "exceedingly_complex": "Exceedingly complex",
2579
+ "more_context_needed": "More context needed"
2580
+ };
2581
+ var PurposeEvaluator = class _PurposeEvaluator extends BaseEvaluator {
2582
+ static metadata = {
2583
+ id: config_default.evaluator.id,
2584
+ name: config_default.evaluator.name,
2585
+ description: config_default.evaluator.description,
2586
+ supportedGrades: SUPPORTED_GRADES,
2587
+ defaultProviders: ["google" /* Google */]
2588
+ };
2589
+ static TEMPERATURE = STEP.generation.temperature;
2590
+ static computeFkScore(text) {
2591
+ const fkStep = config_default.preprocessing.find((p) => p.id === "fk_score");
2592
+ if (!fkStep) throw new Error("fk_score preprocessing step not found in purpose config.json");
2593
+ return runPreprocessingStep(text, fkStep.implementation.typescript);
2594
+ }
2595
+ provider;
2596
+ constructor(config) {
2597
+ super(config);
2598
+ this.provider = this.createConfiguredProvider(
2599
+ "google" /* Google */,
2600
+ STEP.model.name,
2601
+ config.googleApiKey
2602
+ );
2603
+ }
2604
+ /**
2605
+ * Evaluate purpose complexity for a given text and grade level
2606
+ *
2607
+ * @param text - The text to evaluate
2608
+ * @param grade - The target grade level (3-12)
2609
+ * @returns Evaluation result with complexity score and detailed analysis
2610
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2611
+ * @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
2612
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2613
+ */
2614
+ async evaluate(text, grade) {
2615
+ this.logger.info("Starting Purpose evaluation", {
2616
+ evaluator: _PurposeEvaluator.metadata.id,
2617
+ operation: "evaluate",
2618
+ grade,
2619
+ textLength: text.length
2620
+ });
2621
+ const startTime = Date.now();
2622
+ const stageDetails = [];
2623
+ try {
2624
+ this.validateText(text);
2625
+ const gradeNum = this.parseAndValidateGrade(grade);
2626
+ const fkScore = _PurposeEvaluator.computeFkScore(text);
2627
+ const inputs = {
2628
+ text,
2629
+ grade_level: String(gradeNum),
2630
+ fk_score: String(fkScore)
2631
+ };
2632
+ const response = await this.callLLM(inputs);
2633
+ const latencyMs = Date.now() - startTime;
2634
+ const tokenUsage = {
2635
+ input_tokens: response.usage.inputTokens,
2636
+ output_tokens: response.usage.outputTokens
2637
+ };
2638
+ stageDetails.push({
2639
+ stage: STEP.id,
2640
+ provider: this.provider.label,
2641
+ latency_ms: response.latencyMs,
2642
+ token_usage: tokenUsage
2643
+ });
2644
+ const result = {
2645
+ score: COMPLEXITY_SCORE_DISPLAY[response.data.complexity_score],
2646
+ reasoning: response.data.reasoning,
2647
+ metadata: {
2648
+ model: this.provider.label,
2649
+ processingTimeMs: latencyMs
2650
+ },
2651
+ _internal: response.data
2652
+ };
2653
+ this.sendTelemetry({
2654
+ status: "success",
2655
+ latencyMs,
2656
+ textLength: text.length,
2657
+ grade: String(gradeNum),
2658
+ provider: this.provider.label,
2659
+ tokenUsage,
2660
+ metadata: { stage_details: stageDetails },
2661
+ inputText: text
2662
+ }).catch(() => void 0);
2663
+ this.logger.info("Purpose evaluation completed successfully", {
2664
+ evaluator: _PurposeEvaluator.metadata.id,
2665
+ operation: "evaluate",
2666
+ grade: gradeNum,
2667
+ score: result.score,
2668
+ processingTimeMs: latencyMs
2669
+ });
2670
+ return result;
2671
+ } catch (error) {
2672
+ const latencyMs = Date.now() - startTime;
2673
+ this.logger.error("Purpose evaluation failed", {
2674
+ evaluator: _PurposeEvaluator.metadata.id,
2675
+ operation: "evaluate",
2676
+ grade,
2677
+ error: error instanceof Error ? error : void 0,
2678
+ processingTimeMs: latencyMs
2679
+ });
2680
+ const tokenUsage = stageDetails.length > 0 ? {
2681
+ input_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.input_tokens ?? 0), 0),
2682
+ output_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.output_tokens ?? 0), 0)
2683
+ } : void 0;
2684
+ this.sendTelemetry({
2685
+ status: "error",
2686
+ latencyMs,
2687
+ textLength: text.length,
2688
+ grade: String(grade),
2689
+ provider: this.provider.label,
2690
+ tokenUsage,
2691
+ errorCode: error instanceof Error ? error.name : "UnknownError",
2692
+ metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
2693
+ inputText: text
2694
+ }).catch(() => void 0);
2695
+ if (error instanceof ValidationError) throw error;
2696
+ throw wrapProviderError(error, "Purpose evaluation failed");
2697
+ }
2698
+ }
2699
+ parseAndValidateGrade(grade) {
2700
+ const num = Number(grade.trim());
2701
+ if (!Number.isInteger(num) || num < GRADE_MIN || num > GRADE_MAX) {
2702
+ throw new ValidationError(
2703
+ `Invalid grade "${grade}". Purpose evaluator supports integer grades ${GRADE_MIN}\u2013${GRADE_MAX}.`
2704
+ );
2705
+ }
2706
+ return num;
2707
+ }
2708
+ async callLLM(inputs) {
2709
+ const response = await this.provider.generateStructured({
2710
+ messages: [
2711
+ { role: "system", content: getSystemPrompt5(inputs) },
2712
+ { role: "user", content: getUserPrompt5(inputs) }
2713
+ ],
2714
+ schema: PurposeOutputSchema,
2715
+ temperature: _PurposeEvaluator.TEMPERATURE
2716
+ });
2717
+ return { data: response.data, usage: response.usage, latencyMs: response.latencyMs };
2718
+ }
2719
+ };
2720
+ async function evaluatePurpose(text, grade, config) {
2721
+ return new PurposeEvaluator(config).evaluate(text, grade);
2722
+ }
2723
+
2331
2724
  exports.APIError = APIError;
2332
2725
  exports.AuthenticationError = AuthenticationError;
2333
2726
  exports.ComplexityClassificationSchema = ComplexityClassificationSchema;
@@ -2339,6 +2732,9 @@ exports.GradeLevelAppropriatenessEvaluator = GradeLevelAppropriatenessEvaluator;
2339
2732
  exports.GradeLevelAppropriatenessSchema = GradeLevelAppropriatenessSchema;
2340
2733
  exports.LogLevel = LogLevel;
2341
2734
  exports.NetworkError = NetworkError;
2735
+ exports.Provider = Provider;
2736
+ exports.Providers = Providers;
2737
+ exports.PurposeEvaluator = PurposeEvaluator;
2342
2738
  exports.RateLimitError = RateLimitError;
2343
2739
  exports.SentenceAnalysisSchema = SentenceAnalysisSchema;
2344
2740
  exports.SentenceStructureEvaluator = SentenceStructureEvaluator;
@@ -2353,6 +2749,7 @@ exports.calculateFleschKincaidGrade = calculateFleschKincaidGrade;
2353
2749
  exports.calculateReadabilityMetrics = calculateReadabilityMetrics;
2354
2750
  exports.evaluateConventionality = evaluateConventionality;
2355
2751
  exports.evaluateGradeLevelAppropriateness = evaluateGradeLevelAppropriateness;
2752
+ exports.evaluatePurpose = evaluatePurpose;
2356
2753
  exports.evaluateSentenceStructure = evaluateSentenceStructure;
2357
2754
  exports.evaluateSmk = evaluateSmk;
2358
2755
  exports.evaluateTextComplexity = evaluateTextComplexity;