@learning-commons/evaluators 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +188 -45
- package/dist/{base-Ced9oKKa.d.cts → base-DKcAYXfb.d.cts} +142 -9
- package/dist/{base-Ced9oKKa.d.ts → base-DKcAYXfb.d.ts} +142 -9
- package/dist/batch/cli.js +635 -227
- package/dist/batch/cli.js.map +1 -1
- package/dist/batch/index.cjs +618 -218
- package/dist/batch/index.cjs.map +1 -1
- package/dist/batch/index.d.cts +3 -1
- package/dist/batch/index.d.ts +3 -1
- package/dist/batch/index.js +617 -218
- package/dist/batch/index.js.map +1 -1
- package/dist/index.cjs +626 -217
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +155 -86
- package/dist/index.d.ts +155 -86
- package/dist/index.js +622 -218
- package/dist/index.js.map +1 -1
- package/package.json +13 -4
- package/src/batch/README.md +14 -1
package/dist/index.cjs
CHANGED
|
@@ -1,20 +1,22 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
var zod = require('zod');
|
|
4
|
+
var ai = require('ai');
|
|
4
5
|
var crypto = require('crypto');
|
|
5
6
|
var fs = require('fs');
|
|
6
7
|
var path = require('path');
|
|
7
8
|
var os = require('os');
|
|
8
9
|
var url = require('url');
|
|
9
|
-
var ai = require('ai');
|
|
10
10
|
var nlp = require('compromise');
|
|
11
11
|
var syllable = require('syllable');
|
|
12
|
+
var textReadability = require('text-readability');
|
|
12
13
|
var pLimit = require('p-limit');
|
|
13
14
|
|
|
14
15
|
var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null;
|
|
15
16
|
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
16
17
|
|
|
17
18
|
var nlp__default = /*#__PURE__*/_interopDefault(nlp);
|
|
19
|
+
var textReadability__default = /*#__PURE__*/_interopDefault(textReadability);
|
|
18
20
|
var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
|
|
19
21
|
|
|
20
22
|
// src/schemas/outputs.ts
|
|
@@ -33,6 +35,7 @@ var GradeLevelAppropriatenessSchema = zod.z.object({
|
|
|
33
35
|
alternative_grade: GradeBand.describe("An alternative grade level for the text"),
|
|
34
36
|
scaffolding_needed: zod.z.string().describe("Scaffolding needed for the text to be appropriate for the alternative grade")
|
|
35
37
|
});
|
|
38
|
+
var PurposeOutputSchema = zod.z.object({ "complexity_score": zod.z.enum(["slightly_complex", "moderately_complex", "very_complex", "exceedingly_complex", "more_context_needed"]).describe("The Purpose complexity level for the target grade."), "reasoning": zod.z.string().describe("A high-level summary of why the text is at this complexity level for the target grade."), "details": zod.z.object({ "detailed_summary": zod.z.array(zod.z.object({ "factor": zod.z.string().describe("The specific text complexity factor identified."), "description": zod.z.string().describe("How this factor manifests in the text."), "effect_on_complexity_dimension": zod.z.string().describe("How this factor affects the reader's ability to understand the text's specific complexity dimension.") }).strict()).describe("Individual complexity factors with descriptions and their effects."), "adjustment_and_scaffolding": zod.z.array(zod.z.object({ "scaffolding_need": zod.z.string().describe("The complexity factor that requires scaffolding."), "suggestion": zod.z.string().describe("A specific instructional strategy to support students with this factor.") }).strict()).describe("Scaffolding strategies to make the text accessible at the target grade."), "recommended_use_cases": zod.z.array(zod.z.object({ "opportunity": zod.z.string().describe("An instructional opportunity related to the text."), "suggestion": zod.z.string().describe("A specific way to leverage this text for that instructional purpose.") }).strict()).describe("Additional instructional opportunities for using this text.") }).strict().describe("Practical instructional details including scaffolding strategies and recommended use cases.") }).strict();
|
|
36
39
|
|
|
37
40
|
// src/errors.ts
|
|
38
41
|
var EvaluatorError = class extends Error {
|
|
@@ -93,8 +96,9 @@ var TimeoutError = class extends APIError {
|
|
|
93
96
|
function parseProviderError(error) {
|
|
94
97
|
if (error instanceof Error) {
|
|
95
98
|
const message = error.message;
|
|
99
|
+
const err = error;
|
|
96
100
|
const statusMatch = message.match(/\b(4\d{2}|5\d{2})\b/);
|
|
97
|
-
const statusCode = statusMatch ? parseInt(statusMatch[1]) : void 0;
|
|
101
|
+
const statusCode = err.statusCode ?? err.status ?? (statusMatch ? parseInt(statusMatch[1]) : void 0);
|
|
98
102
|
return {
|
|
99
103
|
message,
|
|
100
104
|
statusCode,
|
|
@@ -107,6 +111,11 @@ function parseProviderError(error) {
|
|
|
107
111
|
}
|
|
108
112
|
function wrapProviderError(error, defaultMessage = "API request failed") {
|
|
109
113
|
const { message, statusCode, code } = parseProviderError(error);
|
|
114
|
+
if (statusCode === 404 || statusCode === 400 && /\bmodel\b.*(not found|does not exist|invalid)/i.test(message)) {
|
|
115
|
+
return new ConfigurationError(
|
|
116
|
+
`Model not found or invalid: ${message}. Check the model ID passed to the provider.`
|
|
117
|
+
);
|
|
118
|
+
}
|
|
110
119
|
if (statusCode === 401 || statusCode === 403) {
|
|
111
120
|
return new AuthenticationError(
|
|
112
121
|
message.includes("API key") ? message : "Invalid API key",
|
|
@@ -189,6 +198,119 @@ function createLogger(customLogger, level = 2 /* WARN */) {
|
|
|
189
198
|
}
|
|
190
199
|
return new ConsoleLogger(level);
|
|
191
200
|
}
|
|
201
|
+
|
|
202
|
+
// src/providers/base.ts
|
|
203
|
+
var Providers = {
|
|
204
|
+
google: "google",
|
|
205
|
+
openai: "openai",
|
|
206
|
+
anthropic: "anthropic",
|
|
207
|
+
custom: "custom"
|
|
208
|
+
};
|
|
209
|
+
var VercelAIProvider = class {
|
|
210
|
+
constructor(config) {
|
|
211
|
+
this.config = config;
|
|
212
|
+
if (config.type === "custom") {
|
|
213
|
+
throw new Error(
|
|
214
|
+
"VercelAIProvider does not support custom type. Use config.customProvider directly."
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
if (!config.model || config.model.trim() === "") {
|
|
218
|
+
throw new Error(
|
|
219
|
+
`model is required for VercelAIProvider (type: "${config.type}"). No default is assumed.`
|
|
220
|
+
);
|
|
221
|
+
}
|
|
222
|
+
this.model = config.model;
|
|
223
|
+
this.label = `${config.type}:${config.model}`;
|
|
224
|
+
}
|
|
225
|
+
label;
|
|
226
|
+
model;
|
|
227
|
+
/**
|
|
228
|
+
* Generate structured output using Vercel AI SDK's generateText with output
|
|
229
|
+
*/
|
|
230
|
+
async generateStructured(request) {
|
|
231
|
+
const model = await this.getModel();
|
|
232
|
+
const startTime = Date.now();
|
|
233
|
+
const { output, usage } = await ai.generateText({
|
|
234
|
+
model,
|
|
235
|
+
messages: request.messages,
|
|
236
|
+
output: ai.Output.object({ schema: request.schema }),
|
|
237
|
+
temperature: request.temperature ?? 0,
|
|
238
|
+
maxRetries: this.config.maxRetries ?? 0,
|
|
239
|
+
...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
|
|
240
|
+
});
|
|
241
|
+
return {
|
|
242
|
+
data: output,
|
|
243
|
+
model: this.model,
|
|
244
|
+
usage: {
|
|
245
|
+
inputTokens: usage.inputTokens || 0,
|
|
246
|
+
outputTokens: usage.outputTokens || 0
|
|
247
|
+
},
|
|
248
|
+
latencyMs: Date.now() - startTime
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Generate plain text using Vercel AI SDK's generateText
|
|
253
|
+
*/
|
|
254
|
+
async generateText(messages, temperature) {
|
|
255
|
+
const model = await this.getModel();
|
|
256
|
+
const startTime = Date.now();
|
|
257
|
+
const { text, usage } = await ai.generateText({
|
|
258
|
+
model,
|
|
259
|
+
messages,
|
|
260
|
+
temperature: temperature ?? this.config.temperature ?? 0,
|
|
261
|
+
maxRetries: this.config.maxRetries ?? 0
|
|
262
|
+
});
|
|
263
|
+
return {
|
|
264
|
+
text,
|
|
265
|
+
usage: {
|
|
266
|
+
inputTokens: usage.inputTokens || 0,
|
|
267
|
+
outputTokens: usage.outputTokens || 0
|
|
268
|
+
},
|
|
269
|
+
latencyMs: Date.now() - startTime
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Get the configured language model.
|
|
274
|
+
* Uses dynamic imports so consumers only need to install the provider packages they use.
|
|
275
|
+
*/
|
|
276
|
+
async getModel() {
|
|
277
|
+
const apiKey = this.config.apiKey;
|
|
278
|
+
switch (this.config.type) {
|
|
279
|
+
case "openai": {
|
|
280
|
+
const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
|
|
281
|
+
throw new Error(
|
|
282
|
+
"To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
|
|
283
|
+
);
|
|
284
|
+
});
|
|
285
|
+
return createOpenAI(apiKey ? { apiKey } : {})(this.model);
|
|
286
|
+
}
|
|
287
|
+
case "anthropic": {
|
|
288
|
+
const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
|
|
289
|
+
throw new Error(
|
|
290
|
+
"To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
|
|
291
|
+
);
|
|
292
|
+
});
|
|
293
|
+
return createAnthropic(apiKey ? { apiKey } : {})(this.model);
|
|
294
|
+
}
|
|
295
|
+
case "google": {
|
|
296
|
+
const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
|
|
297
|
+
throw new Error(
|
|
298
|
+
"To use the Google provider, install its adapter: npm install @ai-sdk/google"
|
|
299
|
+
);
|
|
300
|
+
});
|
|
301
|
+
return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(this.model);
|
|
302
|
+
}
|
|
303
|
+
default:
|
|
304
|
+
throw new Error(`Unsupported provider type: ${this.config.type}`);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
};
|
|
308
|
+
function createProvider(config) {
|
|
309
|
+
if (config.type === "custom" && config.customProvider) {
|
|
310
|
+
return config.customProvider;
|
|
311
|
+
}
|
|
312
|
+
return new VercelAIProvider(config);
|
|
313
|
+
}
|
|
192
314
|
var SentenceAnalysisSchema = zod.z.object({
|
|
193
315
|
reasoning: zod.z.string().describe("Step-by-step reasoning for the analysis"),
|
|
194
316
|
// Foundational
|
|
@@ -348,6 +470,12 @@ var VALIDATION_LIMITS = {
|
|
|
348
470
|
/** Maximum text length in characters (100K chars ≈ 25K tokens) */
|
|
349
471
|
MAX_TEXT_LENGTH: 1e5
|
|
350
472
|
};
|
|
473
|
+
var Provider = /* @__PURE__ */ ((Provider2) => {
|
|
474
|
+
Provider2["OpenAI"] = "openai";
|
|
475
|
+
Provider2["Google"] = "google";
|
|
476
|
+
Provider2["Anthropic"] = "anthropic";
|
|
477
|
+
return Provider2;
|
|
478
|
+
})(Provider || {});
|
|
351
479
|
var BaseEvaluator = class {
|
|
352
480
|
telemetryClient;
|
|
353
481
|
logger;
|
|
@@ -365,21 +493,35 @@ var BaseEvaluator = class {
|
|
|
365
493
|
* name: 'My Evaluator',
|
|
366
494
|
* description: 'Does something useful',
|
|
367
495
|
* supportedGrades: ['3', '4', '5'],
|
|
368
|
-
*
|
|
369
|
-
* requiresOpenAIKey: false,
|
|
496
|
+
* defaultProviders: [Provider.Google],
|
|
370
497
|
* };
|
|
371
498
|
* }
|
|
372
499
|
* ```
|
|
373
500
|
*/
|
|
374
501
|
static metadata;
|
|
502
|
+
/**
|
|
503
|
+
* @throws {ConfigurationError} If the subclass has not defined static metadata
|
|
504
|
+
* @throws {ConfigurationError} If modelOverride has an invalid provider or empty model
|
|
505
|
+
* @throws {ConfigurationError} If a required API key is missing
|
|
506
|
+
*/
|
|
375
507
|
constructor(config) {
|
|
376
508
|
this.logger = createLogger(config.logger, config.logLevel ?? 2 /* WARN */);
|
|
509
|
+
this.validateModelOverride(config);
|
|
377
510
|
this.validateApiKeys(config);
|
|
378
511
|
const telemetryConfig = this.normalizeTelemetryConfig(config.telemetry);
|
|
379
512
|
this.config = {
|
|
380
513
|
maxRetries: config.maxRetries ?? 2,
|
|
381
|
-
telemetry: telemetryConfig
|
|
514
|
+
telemetry: telemetryConfig,
|
|
515
|
+
modelOverride: config.modelOverride,
|
|
516
|
+
googleApiKey: config.googleApiKey,
|
|
517
|
+
openaiApiKey: config.openaiApiKey,
|
|
518
|
+
anthropicApiKey: config.anthropicApiKey
|
|
382
519
|
};
|
|
520
|
+
if (config.modelOverride) {
|
|
521
|
+
this.logger.warn(
|
|
522
|
+
`modelOverride is active: using ${config.modelOverride.provider}:${config.modelOverride.model} instead of the default model. Evaluation quality may differ from recommended defaults.`
|
|
523
|
+
);
|
|
524
|
+
}
|
|
383
525
|
if (this.config.telemetry.enabled) {
|
|
384
526
|
this.telemetryClient = new TelemetryClient({
|
|
385
527
|
endpoint: "https://api.learningcommons.org/evaluators-telemetry/v1/events",
|
|
@@ -404,21 +546,62 @@ var BaseEvaluator = class {
|
|
|
404
546
|
return meta;
|
|
405
547
|
}
|
|
406
548
|
/**
|
|
407
|
-
* Validate
|
|
408
|
-
*
|
|
549
|
+
* Validate modelOverride shape: provider must be a known Provider value and
|
|
550
|
+
* model must be a non-empty string.
|
|
551
|
+
* @throws {ConfigurationError} If the override is malformed
|
|
409
552
|
*/
|
|
410
|
-
|
|
411
|
-
if (
|
|
553
|
+
validateModelOverride(config) {
|
|
554
|
+
if (!config.modelOverride) return;
|
|
555
|
+
const validProviders = Object.values(Provider);
|
|
556
|
+
if (!validProviders.includes(config.modelOverride.provider)) {
|
|
412
557
|
throw new ConfigurationError(
|
|
413
|
-
`
|
|
558
|
+
`Invalid provider "${config.modelOverride.provider}" in modelOverride. Valid providers are: ${validProviders.join(", ")}.`
|
|
414
559
|
);
|
|
415
560
|
}
|
|
416
|
-
if (
|
|
561
|
+
if (!config.modelOverride.model || config.modelOverride.model.trim() === "") {
|
|
417
562
|
throw new ConfigurationError(
|
|
418
|
-
`
|
|
563
|
+
`modelOverride.model is required. Specify the model ID for provider "${config.modelOverride.provider}".`
|
|
419
564
|
);
|
|
420
565
|
}
|
|
421
566
|
}
|
|
567
|
+
/**
|
|
568
|
+
* Validate that the required API key is present.
|
|
569
|
+
* When modelOverride is set, checks the override provider's key.
|
|
570
|
+
* Otherwise checks the keys required by the evaluator's default providers.
|
|
571
|
+
* @throws {ConfigurationError} If a required key is missing
|
|
572
|
+
*/
|
|
573
|
+
validateApiKeys(config) {
|
|
574
|
+
const keyFor = {
|
|
575
|
+
["openai" /* OpenAI */]: config.openaiApiKey?.trim() || void 0,
|
|
576
|
+
["google" /* Google */]: config.googleApiKey?.trim() || void 0,
|
|
577
|
+
["anthropic" /* Anthropic */]: config.anthropicApiKey?.trim() || void 0
|
|
578
|
+
};
|
|
579
|
+
const humanName = {
|
|
580
|
+
["openai" /* OpenAI */]: "OpenAI API key",
|
|
581
|
+
["google" /* Google */]: "Google API key",
|
|
582
|
+
["anthropic" /* Anthropic */]: "Anthropic API key"
|
|
583
|
+
};
|
|
584
|
+
const configKey = {
|
|
585
|
+
["openai" /* OpenAI */]: "openaiApiKey",
|
|
586
|
+
["google" /* Google */]: "googleApiKey",
|
|
587
|
+
["anthropic" /* Anthropic */]: "anthropicApiKey"
|
|
588
|
+
};
|
|
589
|
+
if (config.modelOverride) {
|
|
590
|
+
if (!keyFor[config.modelOverride.provider]) {
|
|
591
|
+
throw new ConfigurationError(
|
|
592
|
+
`${humanName[config.modelOverride.provider]} is required when using modelOverride with provider "${config.modelOverride.provider}". Pass ${configKey[config.modelOverride.provider]} in config.`
|
|
593
|
+
);
|
|
594
|
+
}
|
|
595
|
+
return;
|
|
596
|
+
}
|
|
597
|
+
for (const provider of this.metadata.defaultProviders) {
|
|
598
|
+
if (!keyFor[provider]) {
|
|
599
|
+
throw new ConfigurationError(
|
|
600
|
+
`${humanName[provider]} is required for ${this.metadata.name} evaluator. Pass ${configKey[provider]} in config.`
|
|
601
|
+
);
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
}
|
|
422
605
|
/**
|
|
423
606
|
* Normalize telemetry config to standard format
|
|
424
607
|
*/
|
|
@@ -499,6 +682,33 @@ var BaseEvaluator = class {
|
|
|
499
682
|
);
|
|
500
683
|
}
|
|
501
684
|
}
|
|
685
|
+
/**
|
|
686
|
+
* Create an LLM provider, honouring modelOverride if set.
|
|
687
|
+
* When override is active, the key for the override provider is resolved
|
|
688
|
+
* from the matching top-level config field (e.g. anthropicApiKey for Anthropic).
|
|
689
|
+
*/
|
|
690
|
+
createConfiguredProvider(defaultType, defaultModel, defaultApiKey) {
|
|
691
|
+
const override = this.config.modelOverride;
|
|
692
|
+
if (override) {
|
|
693
|
+
const apiKeyFor = {
|
|
694
|
+
["openai" /* OpenAI */]: this.config.openaiApiKey,
|
|
695
|
+
["google" /* Google */]: this.config.googleApiKey,
|
|
696
|
+
["anthropic" /* Anthropic */]: this.config.anthropicApiKey
|
|
697
|
+
};
|
|
698
|
+
return createProvider({
|
|
699
|
+
type: override.provider,
|
|
700
|
+
model: override.model,
|
|
701
|
+
apiKey: apiKeyFor[override.provider],
|
|
702
|
+
maxRetries: this.config.maxRetries
|
|
703
|
+
});
|
|
704
|
+
}
|
|
705
|
+
return createProvider({
|
|
706
|
+
type: defaultType,
|
|
707
|
+
model: defaultModel,
|
|
708
|
+
apiKey: defaultApiKey,
|
|
709
|
+
maxRetries: this.config.maxRetries
|
|
710
|
+
});
|
|
711
|
+
}
|
|
502
712
|
/**
|
|
503
713
|
* Send telemetry event to analytics service
|
|
504
714
|
* Common helper for all evaluators
|
|
@@ -519,123 +729,12 @@ var BaseEvaluator = class {
|
|
|
519
729
|
provider: params.provider,
|
|
520
730
|
token_usage: params.tokenUsage,
|
|
521
731
|
metadata: params.metadata,
|
|
732
|
+
model_override: this.config.modelOverride ? true : void 0,
|
|
522
733
|
// Include input text only if recording is enabled
|
|
523
734
|
input_text: this.config.telemetry.recordInputs ? params.inputText : void 0
|
|
524
735
|
});
|
|
525
736
|
}
|
|
526
737
|
};
|
|
527
|
-
var DEFAULT_MODELS = {
|
|
528
|
-
openai: "gpt-4o",
|
|
529
|
-
anthropic: "claude-sonnet-4-5-20250929",
|
|
530
|
-
google: "gemini-2.5-pro"
|
|
531
|
-
};
|
|
532
|
-
var VercelAIProvider = class {
|
|
533
|
-
constructor(config) {
|
|
534
|
-
this.config = config;
|
|
535
|
-
if (config.type === "custom") {
|
|
536
|
-
throw new Error(
|
|
537
|
-
"VercelAIProvider does not support custom type. Use config.customProvider directly."
|
|
538
|
-
);
|
|
539
|
-
}
|
|
540
|
-
}
|
|
541
|
-
/**
|
|
542
|
-
* Generate structured output using Vercel AI SDK's generateText with output
|
|
543
|
-
*/
|
|
544
|
-
async generateStructured(request) {
|
|
545
|
-
const model = await this.getModel(request.model);
|
|
546
|
-
const startTime = Date.now();
|
|
547
|
-
const { output, usage } = await ai.generateText({
|
|
548
|
-
model,
|
|
549
|
-
messages: request.messages,
|
|
550
|
-
output: ai.Output.object({ schema: request.schema }),
|
|
551
|
-
temperature: request.temperature ?? 0,
|
|
552
|
-
maxRetries: this.config.maxRetries ?? 0,
|
|
553
|
-
...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
|
|
554
|
-
});
|
|
555
|
-
return {
|
|
556
|
-
data: output,
|
|
557
|
-
model: request.model || this.getDefaultModel(),
|
|
558
|
-
usage: {
|
|
559
|
-
inputTokens: usage.inputTokens || 0,
|
|
560
|
-
outputTokens: usage.outputTokens || 0
|
|
561
|
-
},
|
|
562
|
-
latencyMs: Date.now() - startTime
|
|
563
|
-
};
|
|
564
|
-
}
|
|
565
|
-
/**
|
|
566
|
-
* Generate plain text using Vercel AI SDK's generateText
|
|
567
|
-
*/
|
|
568
|
-
async generateText(messages, temperature) {
|
|
569
|
-
const model = await this.getModel();
|
|
570
|
-
const startTime = Date.now();
|
|
571
|
-
const { text, usage } = await ai.generateText({
|
|
572
|
-
model,
|
|
573
|
-
messages,
|
|
574
|
-
temperature: temperature ?? this.config.temperature ?? 0,
|
|
575
|
-
maxRetries: this.config.maxRetries ?? 0
|
|
576
|
-
});
|
|
577
|
-
return {
|
|
578
|
-
text,
|
|
579
|
-
usage: {
|
|
580
|
-
inputTokens: usage.inputTokens || 0,
|
|
581
|
-
outputTokens: usage.outputTokens || 0
|
|
582
|
-
},
|
|
583
|
-
latencyMs: Date.now() - startTime
|
|
584
|
-
};
|
|
585
|
-
}
|
|
586
|
-
/**
|
|
587
|
-
* Get the configured language model.
|
|
588
|
-
* Uses dynamic imports so consumers only need to install the provider packages they use.
|
|
589
|
-
*/
|
|
590
|
-
async getModel(requestModel) {
|
|
591
|
-
const modelId = requestModel || this.config.model || this.getDefaultModel();
|
|
592
|
-
const apiKey = this.config.apiKey;
|
|
593
|
-
switch (this.config.type) {
|
|
594
|
-
case "openai": {
|
|
595
|
-
const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
|
|
596
|
-
throw new Error(
|
|
597
|
-
"To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
|
|
598
|
-
);
|
|
599
|
-
});
|
|
600
|
-
return createOpenAI(apiKey ? { apiKey } : {})(modelId);
|
|
601
|
-
}
|
|
602
|
-
case "anthropic": {
|
|
603
|
-
const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
|
|
604
|
-
throw new Error(
|
|
605
|
-
"To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
|
|
606
|
-
);
|
|
607
|
-
});
|
|
608
|
-
return createAnthropic(apiKey ? { apiKey } : {})(modelId);
|
|
609
|
-
}
|
|
610
|
-
case "google": {
|
|
611
|
-
const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
|
|
612
|
-
throw new Error(
|
|
613
|
-
"To use the Google provider, install its adapter: npm install @ai-sdk/google"
|
|
614
|
-
);
|
|
615
|
-
});
|
|
616
|
-
return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(modelId);
|
|
617
|
-
}
|
|
618
|
-
default:
|
|
619
|
-
throw new Error(`Unsupported provider type: ${this.config.type}`);
|
|
620
|
-
}
|
|
621
|
-
}
|
|
622
|
-
/**
|
|
623
|
-
* Get default model for the configured provider
|
|
624
|
-
*/
|
|
625
|
-
getDefaultModel() {
|
|
626
|
-
const providerType = this.config.type;
|
|
627
|
-
if (providerType === "custom") {
|
|
628
|
-
throw new Error("Cannot get default model for custom provider type");
|
|
629
|
-
}
|
|
630
|
-
return DEFAULT_MODELS[providerType];
|
|
631
|
-
}
|
|
632
|
-
};
|
|
633
|
-
function createProvider(config) {
|
|
634
|
-
if (config.type === "custom" && config.customProvider) {
|
|
635
|
-
return config.customProvider;
|
|
636
|
-
}
|
|
637
|
-
return new VercelAIProvider(config);
|
|
638
|
-
}
|
|
639
738
|
var VocabularyComplexitySchema = zod.z.object({
|
|
640
739
|
tier_2_words: zod.z.string().describe("List of Tier 2 words (academic words)"),
|
|
641
740
|
tier_3_words: zod.z.string().describe("List of Tier 3 words (domain-specific)"),
|
|
@@ -829,6 +928,44 @@ function featuresToJSON(features, decimals = 1, castToInt = true) {
|
|
|
829
928
|
}
|
|
830
929
|
return JSON.stringify(payload, null, 2);
|
|
831
930
|
}
|
|
931
|
+
var LIBRARY_ADAPTERS = {
|
|
932
|
+
"text-readability": {
|
|
933
|
+
call(fnName, text) {
|
|
934
|
+
const fn = textReadability__default.default[fnName];
|
|
935
|
+
if (typeof fn !== "function") {
|
|
936
|
+
throw new Error(`Function "${fnName}" not found in text-readability.`);
|
|
937
|
+
}
|
|
938
|
+
return fn.call(textReadability__default.default, text);
|
|
939
|
+
}
|
|
940
|
+
}
|
|
941
|
+
};
|
|
942
|
+
var POST_TRANSFORMS = {
|
|
943
|
+
round(value, { precision = 0 }) {
|
|
944
|
+
const factor = 10 ** precision;
|
|
945
|
+
return Math.round(value * factor) / factor;
|
|
946
|
+
}
|
|
947
|
+
};
|
|
948
|
+
function runPreprocessingStep(text, impl) {
|
|
949
|
+
const adapter = LIBRARY_ADAPTERS[impl.library];
|
|
950
|
+
if (!adapter) {
|
|
951
|
+
const supported = Object.keys(LIBRARY_ADAPTERS).join(", ");
|
|
952
|
+
throw new Error(
|
|
953
|
+
`Unsupported preprocessing library "${impl.library}". Supported: ${supported}.`
|
|
954
|
+
);
|
|
955
|
+
}
|
|
956
|
+
let result = adapter.call(impl.function, text);
|
|
957
|
+
if (impl.post_transform) {
|
|
958
|
+
const transform = POST_TRANSFORMS[impl.post_transform.type];
|
|
959
|
+
if (!transform) {
|
|
960
|
+
const supported = Object.keys(POST_TRANSFORMS).join(", ");
|
|
961
|
+
throw new Error(
|
|
962
|
+
`Unsupported post_transform type "${impl.post_transform.type}". Supported: ${supported}.`
|
|
963
|
+
);
|
|
964
|
+
}
|
|
965
|
+
result = transform(result, impl.post_transform);
|
|
966
|
+
}
|
|
967
|
+
return result;
|
|
968
|
+
}
|
|
832
969
|
|
|
833
970
|
// ../../evals/prompts/vocabulary/background-knowledge.txt
|
|
834
971
|
var background_knowledge_default = `
|
|
@@ -1134,32 +1271,28 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1134
1271
|
name: "Vocabulary",
|
|
1135
1272
|
description: "Evaluates vocabulary complexity of educational texts relative to grade level",
|
|
1136
1273
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1137
|
-
|
|
1138
|
-
requiresOpenAIKey: true
|
|
1274
|
+
defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
|
|
1139
1275
|
};
|
|
1140
1276
|
grades34ComplexityProvider;
|
|
1141
1277
|
otherGradesComplexityProvider;
|
|
1142
1278
|
backgroundKnowledgeProvider;
|
|
1143
1279
|
constructor(config) {
|
|
1144
1280
|
super(config);
|
|
1145
|
-
this.grades34ComplexityProvider =
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
apiKey: config.openaiApiKey,
|
|
1161
|
-
maxRetries: this.config.maxRetries
|
|
1162
|
-
});
|
|
1281
|
+
this.grades34ComplexityProvider = this.createConfiguredProvider(
|
|
1282
|
+
"google" /* Google */,
|
|
1283
|
+
"gemini-2.5-pro",
|
|
1284
|
+
config.googleApiKey
|
|
1285
|
+
);
|
|
1286
|
+
this.otherGradesComplexityProvider = this.createConfiguredProvider(
|
|
1287
|
+
"openai" /* OpenAI */,
|
|
1288
|
+
"gpt-4.1-2025-04-14",
|
|
1289
|
+
config.openaiApiKey
|
|
1290
|
+
);
|
|
1291
|
+
this.backgroundKnowledgeProvider = this.createConfiguredProvider(
|
|
1292
|
+
"openai" /* OpenAI */,
|
|
1293
|
+
"gpt-4o-2024-11-20",
|
|
1294
|
+
config.openaiApiKey
|
|
1295
|
+
);
|
|
1163
1296
|
}
|
|
1164
1297
|
/**
|
|
1165
1298
|
* Evaluate vocabulary complexity for a given text and grade level
|
|
@@ -1168,6 +1301,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1168
1301
|
* @param grade - The target grade level (3-12)
|
|
1169
1302
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1170
1303
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1304
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1171
1305
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1172
1306
|
*/
|
|
1173
1307
|
async evaluate(text, grade) {
|
|
@@ -1179,7 +1313,9 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1179
1313
|
});
|
|
1180
1314
|
const startTime = Date.now();
|
|
1181
1315
|
const stageDetails = [];
|
|
1182
|
-
const
|
|
1316
|
+
const complexityProviderLabel = grade === "3" || grade === "4" ? this.grades34ComplexityProvider.label : this.otherGradesComplexityProvider.label;
|
|
1317
|
+
const backgroundProviderLabel = this.backgroundKnowledgeProvider.label;
|
|
1318
|
+
const modelLabel = this.config.modelOverride ? backgroundProviderLabel : `${backgroundProviderLabel}+${complexityProviderLabel}`;
|
|
1183
1319
|
try {
|
|
1184
1320
|
this.validateText(text);
|
|
1185
1321
|
this.validateGrade(grade, new Set(_VocabularyEvaluator.metadata.supportedGrades));
|
|
@@ -1190,7 +1326,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1190
1326
|
const bgResponse = await this.getBackgroundKnowledgeAssumption(text, grade);
|
|
1191
1327
|
stageDetails.push({
|
|
1192
1328
|
stage: "background_knowledge",
|
|
1193
|
-
provider:
|
|
1329
|
+
provider: backgroundProviderLabel,
|
|
1194
1330
|
latency_ms: bgResponse.latencyMs,
|
|
1195
1331
|
token_usage: {
|
|
1196
1332
|
input_tokens: bgResponse.usage.inputTokens,
|
|
@@ -1206,7 +1342,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1206
1342
|
);
|
|
1207
1343
|
stageDetails.push({
|
|
1208
1344
|
stage: "complexity_evaluation",
|
|
1209
|
-
provider:
|
|
1345
|
+
provider: complexityProviderLabel,
|
|
1210
1346
|
latency_ms: complexityResponse.latencyMs,
|
|
1211
1347
|
token_usage: {
|
|
1212
1348
|
input_tokens: complexityResponse.usage.inputTokens,
|
|
@@ -1222,8 +1358,10 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1222
1358
|
score: complexityResponse.data.complexity_score,
|
|
1223
1359
|
reasoning: complexityResponse.data.reasoning,
|
|
1224
1360
|
metadata: {
|
|
1225
|
-
model:
|
|
1226
|
-
processingTimeMs: latencyMs
|
|
1361
|
+
model: modelLabel,
|
|
1362
|
+
processingTimeMs: latencyMs,
|
|
1363
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
1364
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1227
1365
|
},
|
|
1228
1366
|
_internal: complexityResponse.data
|
|
1229
1367
|
};
|
|
@@ -1232,7 +1370,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1232
1370
|
latencyMs,
|
|
1233
1371
|
textLength: text.length,
|
|
1234
1372
|
grade,
|
|
1235
|
-
provider:
|
|
1373
|
+
provider: modelLabel,
|
|
1236
1374
|
tokenUsage: totalTokenUsage,
|
|
1237
1375
|
metadata: {
|
|
1238
1376
|
stage_details: stageDetails
|
|
@@ -1267,7 +1405,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1267
1405
|
latencyMs,
|
|
1268
1406
|
textLength: text.length,
|
|
1269
1407
|
grade,
|
|
1270
|
-
provider:
|
|
1408
|
+
provider: modelLabel,
|
|
1271
1409
|
tokenUsage: totalTokenUsage,
|
|
1272
1410
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1273
1411
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -1438,25 +1576,12 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1438
1576
|
name: "Sentence Structure",
|
|
1439
1577
|
description: "Evaluates sentence structure complexity based on grammatical features",
|
|
1440
1578
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1441
|
-
|
|
1442
|
-
requiresOpenAIKey: true
|
|
1579
|
+
defaultProviders: ["openai" /* OpenAI */]
|
|
1443
1580
|
};
|
|
1444
|
-
|
|
1445
|
-
complexityProvider;
|
|
1581
|
+
provider;
|
|
1446
1582
|
constructor(config) {
|
|
1447
1583
|
super(config);
|
|
1448
|
-
this.
|
|
1449
|
-
type: "openai",
|
|
1450
|
-
model: "gpt-4o",
|
|
1451
|
-
apiKey: config.openaiApiKey,
|
|
1452
|
-
maxRetries: this.config.maxRetries
|
|
1453
|
-
});
|
|
1454
|
-
this.complexityProvider = createProvider({
|
|
1455
|
-
type: "openai",
|
|
1456
|
-
model: "gpt-4o",
|
|
1457
|
-
apiKey: config.openaiApiKey,
|
|
1458
|
-
maxRetries: this.config.maxRetries
|
|
1459
|
-
});
|
|
1584
|
+
this.provider = this.createConfiguredProvider("openai" /* OpenAI */, "gpt-4o", config.openaiApiKey);
|
|
1460
1585
|
}
|
|
1461
1586
|
/**
|
|
1462
1587
|
* Evaluate sentence structure complexity for a given text and grade level
|
|
@@ -1465,6 +1590,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1465
1590
|
* @param grade - The target grade level (3-12)
|
|
1466
1591
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1467
1592
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1593
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1468
1594
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1469
1595
|
*/
|
|
1470
1596
|
async evaluate(text, grade) {
|
|
@@ -1486,7 +1612,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1486
1612
|
const analysisResponse = await this.analyzeSentenceStructure(text);
|
|
1487
1613
|
stageDetails.push({
|
|
1488
1614
|
stage: "sentence_analysis",
|
|
1489
|
-
provider:
|
|
1615
|
+
provider: this.provider.label,
|
|
1490
1616
|
latency_ms: analysisResponse.latencyMs,
|
|
1491
1617
|
token_usage: {
|
|
1492
1618
|
input_tokens: analysisResponse.usage.inputTokens,
|
|
@@ -1501,7 +1627,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1501
1627
|
const complexityResponse = await this.classifyComplexity(features, grade, text);
|
|
1502
1628
|
stageDetails.push({
|
|
1503
1629
|
stage: "complexity_classification",
|
|
1504
|
-
provider:
|
|
1630
|
+
provider: this.provider.label,
|
|
1505
1631
|
latency_ms: complexityResponse.latencyMs,
|
|
1506
1632
|
token_usage: {
|
|
1507
1633
|
input_tokens: complexityResponse.usage.inputTokens,
|
|
@@ -1517,8 +1643,10 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1517
1643
|
score: complexityResponse.data.answer,
|
|
1518
1644
|
reasoning: complexityResponse.data.reasoning,
|
|
1519
1645
|
metadata: {
|
|
1520
|
-
model:
|
|
1521
|
-
processingTimeMs: latencyMs
|
|
1646
|
+
model: this.provider.label,
|
|
1647
|
+
processingTimeMs: latencyMs,
|
|
1648
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
1649
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1522
1650
|
},
|
|
1523
1651
|
_internal: {
|
|
1524
1652
|
sentenceAnalysis: analysisResponse.data,
|
|
@@ -1531,7 +1659,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1531
1659
|
latencyMs,
|
|
1532
1660
|
textLength: text.length,
|
|
1533
1661
|
grade,
|
|
1534
|
-
provider:
|
|
1662
|
+
provider: this.provider.label,
|
|
1535
1663
|
tokenUsage: totalTokenUsage,
|
|
1536
1664
|
metadata: {
|
|
1537
1665
|
stage_details: stageDetails
|
|
@@ -1566,7 +1694,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1566
1694
|
latencyMs,
|
|
1567
1695
|
textLength: text.length,
|
|
1568
1696
|
grade,
|
|
1569
|
-
provider:
|
|
1697
|
+
provider: this.provider.label,
|
|
1570
1698
|
tokenUsage: totalTokenUsage,
|
|
1571
1699
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1572
1700
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -1594,7 +1722,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1594
1722
|
`flesch_kincaid_grade: ${metrics.fleschKincaidGrade}`
|
|
1595
1723
|
].join("\n");
|
|
1596
1724
|
const userPrompt = getUserPromptAnalysis(text, gtCountsStr);
|
|
1597
|
-
const response = await this.
|
|
1725
|
+
const response = await this.provider.generateStructured({
|
|
1598
1726
|
messages: [
|
|
1599
1727
|
{ role: "system", content: getSystemPromptAnalysis() },
|
|
1600
1728
|
{ role: "user", content: userPrompt }
|
|
@@ -1616,7 +1744,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1616
1744
|
async classifyComplexity(features, grade, excerpt) {
|
|
1617
1745
|
const featuresJSON = featuresToJSON(features, 1, true);
|
|
1618
1746
|
const userPrompt = getUserPromptComplexity(featuresJSON, grade, excerpt);
|
|
1619
|
-
const response = await this.
|
|
1747
|
+
const response = await this.provider.generateStructured({
|
|
1620
1748
|
messages: [
|
|
1621
1749
|
{ role: "system", content: getSystemPromptComplexity() },
|
|
1622
1750
|
{ role: "user", content: userPrompt }
|
|
@@ -1667,18 +1795,16 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1667
1795
|
description: "Determines appropriate grade level for text with scaffolding recommendations",
|
|
1668
1796
|
supportedGrades: [],
|
|
1669
1797
|
// No grade parameter required - evaluates what grade the text is appropriate for
|
|
1670
|
-
|
|
1671
|
-
requiresOpenAIKey: false
|
|
1798
|
+
defaultProviders: ["google" /* Google */]
|
|
1672
1799
|
};
|
|
1673
1800
|
provider;
|
|
1674
1801
|
constructor(config) {
|
|
1675
1802
|
super(config);
|
|
1676
|
-
this.provider =
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
});
|
|
1803
|
+
this.provider = this.createConfiguredProvider(
|
|
1804
|
+
"google" /* Google */,
|
|
1805
|
+
"gemini-2.5-pro",
|
|
1806
|
+
config.googleApiKey
|
|
1807
|
+
);
|
|
1682
1808
|
}
|
|
1683
1809
|
/**
|
|
1684
1810
|
* Evaluate grade level appropriateness for a given text
|
|
@@ -1686,6 +1812,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1686
1812
|
* @param text - The text to evaluate
|
|
1687
1813
|
* @returns Evaluation result with grade recommendations and scaffolding suggestions
|
|
1688
1814
|
* @throws {ValidationError} If text is empty or too short/long
|
|
1815
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1689
1816
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1690
1817
|
*/
|
|
1691
1818
|
async evaluate(text) {
|
|
@@ -1719,8 +1846,10 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1719
1846
|
score: response.data.grade,
|
|
1720
1847
|
reasoning: response.data.reasoning,
|
|
1721
1848
|
metadata: {
|
|
1722
|
-
model:
|
|
1723
|
-
processingTimeMs: latencyMs
|
|
1849
|
+
model: this.provider.label,
|
|
1850
|
+
processingTimeMs: latencyMs,
|
|
1851
|
+
inputTokens: tokenUsage.input_tokens,
|
|
1852
|
+
outputTokens: tokenUsage.output_tokens
|
|
1724
1853
|
},
|
|
1725
1854
|
_internal: response.data
|
|
1726
1855
|
};
|
|
@@ -1728,7 +1857,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1728
1857
|
status: "success",
|
|
1729
1858
|
latencyMs,
|
|
1730
1859
|
textLength: text.length,
|
|
1731
|
-
provider:
|
|
1860
|
+
provider: this.provider.label,
|
|
1732
1861
|
tokenUsage,
|
|
1733
1862
|
// No metadata.stage_details for single-stage evaluator
|
|
1734
1863
|
inputText: text
|
|
@@ -1753,7 +1882,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1753
1882
|
status: "error",
|
|
1754
1883
|
latencyMs,
|
|
1755
1884
|
textLength: text.length,
|
|
1756
|
-
provider:
|
|
1885
|
+
provider: this.provider.label,
|
|
1757
1886
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1758
1887
|
inputText: text
|
|
1759
1888
|
}).catch(() => {
|
|
@@ -1868,18 +1997,16 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1868
1997
|
name: "Subject Matter Knowledge",
|
|
1869
1998
|
description: "Evaluates background knowledge demands of educational texts relative to grade level",
|
|
1870
1999
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1871
|
-
|
|
1872
|
-
requiresOpenAIKey: false
|
|
2000
|
+
defaultProviders: ["google" /* Google */]
|
|
1873
2001
|
};
|
|
1874
2002
|
provider;
|
|
1875
2003
|
constructor(config) {
|
|
1876
2004
|
super(config);
|
|
1877
|
-
this.provider =
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
});
|
|
2005
|
+
this.provider = this.createConfiguredProvider(
|
|
2006
|
+
"google" /* Google */,
|
|
2007
|
+
"gemini-3-flash-preview",
|
|
2008
|
+
config.googleApiKey
|
|
2009
|
+
);
|
|
1883
2010
|
}
|
|
1884
2011
|
/**
|
|
1885
2012
|
* Evaluate subject matter knowledge complexity for a given text and grade level
|
|
@@ -1888,6 +2015,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1888
2015
|
* @param grade - The target grade level (3-12)
|
|
1889
2016
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1890
2017
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2018
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1891
2019
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1892
2020
|
*/
|
|
1893
2021
|
async evaluate(text, grade) {
|
|
@@ -1910,7 +2038,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1910
2038
|
const response = await this.evaluateSmk(text, grade, fkScore);
|
|
1911
2039
|
stageDetails.push({
|
|
1912
2040
|
stage: "smk_evaluation",
|
|
1913
|
-
provider:
|
|
2041
|
+
provider: this.provider.label,
|
|
1914
2042
|
latency_ms: response.latencyMs,
|
|
1915
2043
|
token_usage: {
|
|
1916
2044
|
input_tokens: response.usage.inputTokens,
|
|
@@ -1926,8 +2054,10 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1926
2054
|
score: response.data.complexity_score,
|
|
1927
2055
|
reasoning: response.data.reasoning,
|
|
1928
2056
|
metadata: {
|
|
1929
|
-
model:
|
|
1930
|
-
processingTimeMs: latencyMs
|
|
2057
|
+
model: this.provider.label,
|
|
2058
|
+
processingTimeMs: latencyMs,
|
|
2059
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
2060
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1931
2061
|
},
|
|
1932
2062
|
_internal: response.data
|
|
1933
2063
|
};
|
|
@@ -1936,7 +2066,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1936
2066
|
latencyMs,
|
|
1937
2067
|
textLength: text.length,
|
|
1938
2068
|
grade,
|
|
1939
|
-
provider:
|
|
2069
|
+
provider: this.provider.label,
|
|
1940
2070
|
tokenUsage: totalTokenUsage,
|
|
1941
2071
|
metadata: {
|
|
1942
2072
|
stage_details: stageDetails
|
|
@@ -1971,7 +2101,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1971
2101
|
latencyMs,
|
|
1972
2102
|
textLength: text.length,
|
|
1973
2103
|
grade,
|
|
1974
|
-
provider:
|
|
2104
|
+
provider: this.provider.label,
|
|
1975
2105
|
tokenUsage: totalTokenUsage,
|
|
1976
2106
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1977
2107
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -2079,18 +2209,16 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2079
2209
|
name: "Conventionality",
|
|
2080
2210
|
description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
|
|
2081
2211
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
2082
|
-
|
|
2083
|
-
requiresOpenAIKey: false
|
|
2212
|
+
defaultProviders: ["google" /* Google */]
|
|
2084
2213
|
};
|
|
2085
2214
|
provider;
|
|
2086
2215
|
constructor(config) {
|
|
2087
2216
|
super(config);
|
|
2088
|
-
this.provider =
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
});
|
|
2217
|
+
this.provider = this.createConfiguredProvider(
|
|
2218
|
+
"google" /* Google */,
|
|
2219
|
+
"gemini-3-flash-preview",
|
|
2220
|
+
config.googleApiKey
|
|
2221
|
+
);
|
|
2094
2222
|
}
|
|
2095
2223
|
/**
|
|
2096
2224
|
* Evaluate conventionality complexity for a given text and grade level
|
|
@@ -2099,6 +2227,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2099
2227
|
* @param grade - The target grade level (3-12)
|
|
2100
2228
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
2101
2229
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2230
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2102
2231
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
2103
2232
|
*/
|
|
2104
2233
|
async evaluate(text, grade) {
|
|
@@ -2121,7 +2250,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2121
2250
|
const response = await this.evaluateConventionality(text, grade, fkScore);
|
|
2122
2251
|
stageDetails.push({
|
|
2123
2252
|
stage: "conventionality_evaluation",
|
|
2124
|
-
provider:
|
|
2253
|
+
provider: this.provider.label,
|
|
2125
2254
|
latency_ms: response.latencyMs,
|
|
2126
2255
|
token_usage: {
|
|
2127
2256
|
input_tokens: response.usage.inputTokens,
|
|
@@ -2137,8 +2266,10 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2137
2266
|
score: response.data.complexity_score,
|
|
2138
2267
|
reasoning: response.data.reasoning,
|
|
2139
2268
|
metadata: {
|
|
2140
|
-
model:
|
|
2141
|
-
processingTimeMs: latencyMs
|
|
2269
|
+
model: this.provider.label,
|
|
2270
|
+
processingTimeMs: latencyMs,
|
|
2271
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
2272
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
2142
2273
|
},
|
|
2143
2274
|
_internal: response.data
|
|
2144
2275
|
};
|
|
@@ -2147,7 +2278,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2147
2278
|
latencyMs,
|
|
2148
2279
|
textLength: text.length,
|
|
2149
2280
|
grade,
|
|
2150
|
-
provider:
|
|
2281
|
+
provider: this.provider.label,
|
|
2151
2282
|
tokenUsage: totalTokenUsage,
|
|
2152
2283
|
metadata: {
|
|
2153
2284
|
stage_details: stageDetails
|
|
@@ -2182,7 +2313,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2182
2313
|
latencyMs,
|
|
2183
2314
|
textLength: text.length,
|
|
2184
2315
|
grade,
|
|
2185
|
-
provider:
|
|
2316
|
+
provider: this.provider.label,
|
|
2186
2317
|
tokenUsage: totalTokenUsage,
|
|
2187
2318
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
2188
2319
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -2224,8 +2355,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
|
|
|
2224
2355
|
name: "Text Complexity",
|
|
2225
2356
|
description: "Composite evaluator analyzing vocabulary, sentence structure, subject matter knowledge, and conventionality complexity",
|
|
2226
2357
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
2227
|
-
|
|
2228
|
-
requiresOpenAIKey: true
|
|
2358
|
+
defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
|
|
2229
2359
|
};
|
|
2230
2360
|
vocabularyEvaluator;
|
|
2231
2361
|
sentenceStructureEvaluator;
|
|
@@ -2250,7 +2380,8 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
|
|
|
2250
2380
|
* @param text - The text to evaluate
|
|
2251
2381
|
* @param grade - The target grade level (3-12)
|
|
2252
2382
|
* @returns Map of sub-evaluator results
|
|
2253
|
-
* @throws {ValidationError} If text is empty or grade is invalid
|
|
2383
|
+
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2384
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2254
2385
|
* @throws {Error} If all sub-evaluators fail
|
|
2255
2386
|
*/
|
|
2256
2387
|
async evaluate(text, grade) {
|
|
@@ -2297,7 +2428,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
|
|
|
2297
2428
|
latencyMs,
|
|
2298
2429
|
textLength: text.length,
|
|
2299
2430
|
grade,
|
|
2300
|
-
provider: "composite:google+openai",
|
|
2431
|
+
provider: this.config.modelOverride ? `${this.config.modelOverride.provider}:${this.config.modelOverride.model}` : "composite:google+openai",
|
|
2301
2432
|
errorCode: hasFailures ? "PartialFailure" : void 0,
|
|
2302
2433
|
inputText: text
|
|
2303
2434
|
}).catch(() => {
|
|
@@ -2328,6 +2459,280 @@ async function evaluateTextComplexity(text, grade, config) {
|
|
|
2328
2459
|
return evaluator.evaluate(text, grade);
|
|
2329
2460
|
}
|
|
2330
2461
|
|
|
2462
|
+
// ../../evals/prompts/purpose/system.txt
|
|
2463
|
+
var system_default4 = '\n Role\n You are an expert reading assessment evaluator. Your task is to determine the Text Complexity of a given passage based exclusively on the Purpose dimension of the qualitative measures rubric.\n\n Task Details\n You will be provided with an informational or literary `text`, along with its `grade_level` and `fk_score` (Flesch-Kincaid). You must analyze the text and determine how difficult it is for a reader to identify the author\'s purpose. \n\n Crucially, you must distinguish between the text\'s *topic* (what it is about) and its *purpose* (why the author wrote it). \n\n Rubric: Purpose Complexity\n Exceedingly Complex: Subtle and intricate, difficult to determine; includes many theoretical or abstract elements.\n Very Complex: Implicit or subtle but fairly easy to infer; more theoretical or abstract than concrete.\n Moderately Complex: Implied but easy to identify based upon context or source.\n Slightly Complex: Explicitly stated, clear, concrete, narrowly focused.\n More Context Needed: The text is a fragment or lacks necessary introductory context, making the true purpose impossible to determine accurately without external background knowledge.\n\n Expert Rules for Evaluating Purpose\n Based on expert consensus and historical grading corrections, you must apply the following heuristics:\n\n 1. The "Slightly Complex" Benchmark (Straightforward and Explicit)\n A text is Slightly Complex if its purpose is explicitly stated or if its informative intent is straightforward, clear, concrete, and directly answers what the text is immediately about. If the text opens by clearly identifying a concrete topic (e.g., "Pins are made of either brass or iron wire") and rigidly follows through by explaining factual, practical information or a process (like manufacturing steps or geographic facts), the purpose is considered explicit and straightforward. It does *not* require a literal statement like "The purpose of this text is to..." as long as the delivery of information is direct, clear, and unadorned by persuasive elements or complex framing.\n\n 2. Moderately Complex via Guiding Questions & Inquiry Formats\n If a text begins with a general introduction and uses guiding questions (e.g., "Have you ever wondered how clouds are formed?") to transition into an explanation, the purpose is implied rather than explicitly stated upfront. Because the reader must recognize the question as the pivot point for the author\'s intent, it is Moderately Complex.\n\n 3. Moderately Complex via Multiple Distinct Informational Goals\n If a text covers a broad topic but jumps between several distinct scientific or informational objectives without an overarching framing device or explicit thesis (e.g., talking about measuring ice sheets, then mapping, then finding meteorites), the reader must synthesize these diverse facts to recognize the broader purpose, making it Moderately Complex.\n\n 4. Moderately Complex via Arguments Disguised as Information\n If an author is arguing a specific point, correcting a misconception, or defending a stance, but the text could initially be mistaken by students as purely informative factual text, it is Moderately Complex. The reader must infer the persuasive intent or argumentative purpose beneath the informative tone.\n\n 5. "More Context Needed" for Fragments\n If a text is a fragment missing a crucial introduction or context, and identifying the author\'s purpose beyond a simple surface-level description would be exceptionally difficult for a reader in the target grade level without that external background, score it as `more_context_needed`. \n\n Output Format\n Provide your evaluation in the following structure:\n reasoning:\n - Surface Analysis: Identify if the text clearly identifies its topic and delivers straightforward facts, or if it utilizes structural cues, titles, or direct thesis statements.\n - Subtlety & Framing: Is the informative purpose straightforward and concrete? Does it use guiding questions? Is it an argument disguised as pure information? Are there multiple distinct informational goals requiring synthesis?\n - Context Check: Is this text a fragment missing crucial context that obscures the deeper purpose for the target grade level?\n - Rubric Alignment: Explain how the text aligns with the specific language of the rubric, explicitly referencing the expert rules above. Justify why it isn\'t one level simpler or more complex.\n\n answer:\n - complexity_score: (slightly_complex, moderately_complex, very_complex, exceedingly_complex, more_context_needed)\n - reasoning: A brief summary of your final decision.\n - details: Structured breakdown of PurposeDetails including detailed_summary, adjustment_and_scaffolding, and recommended_use_cases.\n';
|
|
2464
|
+
|
|
2465
|
+
// ../../evals/prompts/purpose/user.txt
|
|
2466
|
+
var user_default4 = "Analyze:\nText: {text}\nGrade: {grade_level}\nFK Score: {fk_score}";
|
|
2467
|
+
|
|
2468
|
+
// ../../evals/prompts/purpose/config.json
|
|
2469
|
+
var config_default = {
|
|
2470
|
+
evaluator: {
|
|
2471
|
+
id: "literacy.gla.purpose",
|
|
2472
|
+
name: "Purpose Dimension Text Complexity Evaluator",
|
|
2473
|
+
description: "Evaluates the Purpose dimension of qualitative text complexity for K-12 reading assessment, producing a 5-level rubric rating with structured pedagogical detail."
|
|
2474
|
+
},
|
|
2475
|
+
preprocessing: [
|
|
2476
|
+
{
|
|
2477
|
+
id: "fk_score",
|
|
2478
|
+
kind: "flesch_kincaid_grade",
|
|
2479
|
+
description: "Compute the Flesch-Kincaid Grade Level for the input text and bind it to {fk_score} in the prompt.",
|
|
2480
|
+
input: "text",
|
|
2481
|
+
output: "fk_score",
|
|
2482
|
+
implementation: {
|
|
2483
|
+
python: {
|
|
2484
|
+
library: "textstat",
|
|
2485
|
+
function: "flesch_kincaid_grade",
|
|
2486
|
+
post_transform: {
|
|
2487
|
+
type: "round",
|
|
2488
|
+
precision: 2
|
|
2489
|
+
}
|
|
2490
|
+
},
|
|
2491
|
+
typescript: {
|
|
2492
|
+
library: "text-readability",
|
|
2493
|
+
function: "fleschKincaidGrade",
|
|
2494
|
+
post_transform: {
|
|
2495
|
+
type: "round",
|
|
2496
|
+
precision: 2
|
|
2497
|
+
}
|
|
2498
|
+
}
|
|
2499
|
+
}
|
|
2500
|
+
}
|
|
2501
|
+
],
|
|
2502
|
+
steps: [
|
|
2503
|
+
{
|
|
2504
|
+
id: "evaluate_purpose",
|
|
2505
|
+
description: "Single-call LLM step that produces the EvaluatorOutput JSON.",
|
|
2506
|
+
prompt: {
|
|
2507
|
+
type: "chat",
|
|
2508
|
+
messages: [
|
|
2509
|
+
{
|
|
2510
|
+
role: "system",
|
|
2511
|
+
source_path: "system.txt",
|
|
2512
|
+
sha256: "745b95b7d54dc845b99363c9d3360355381883c22a5f6a0f305d7349cae38a54"
|
|
2513
|
+
},
|
|
2514
|
+
{
|
|
2515
|
+
role: "user",
|
|
2516
|
+
source_path: "user.txt",
|
|
2517
|
+
sha256: "cd8e6347db1a55d104e34436f8f66e833bd6583645d4786a554aaefdd26479b2"
|
|
2518
|
+
}
|
|
2519
|
+
],
|
|
2520
|
+
placeholders: {
|
|
2521
|
+
text: {
|
|
2522
|
+
required: true,
|
|
2523
|
+
source: "input"
|
|
2524
|
+
},
|
|
2525
|
+
grade_level: {
|
|
2526
|
+
required: true,
|
|
2527
|
+
source: "input"
|
|
2528
|
+
},
|
|
2529
|
+
fk_score: {
|
|
2530
|
+
required: true,
|
|
2531
|
+
source: "preprocessing.fk_score"
|
|
2532
|
+
}
|
|
2533
|
+
}
|
|
2534
|
+
},
|
|
2535
|
+
model: {
|
|
2536
|
+
provider: "google",
|
|
2537
|
+
name: "gemini-3-flash-preview"
|
|
2538
|
+
},
|
|
2539
|
+
generation: {
|
|
2540
|
+
temperature: 0
|
|
2541
|
+
},
|
|
2542
|
+
parser: {
|
|
2543
|
+
kind: "structured_output"
|
|
2544
|
+
},
|
|
2545
|
+
output_binding: "formatted_output"
|
|
2546
|
+
}
|
|
2547
|
+
]};
|
|
2548
|
+
|
|
2549
|
+
// src/prompts/purpose/index.ts
|
|
2550
|
+
var STEP_ID = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
|
|
2551
|
+
var _step = config_default.steps.find((s) => s.id === STEP_ID);
|
|
2552
|
+
if (!_step) throw new Error(`Step "${STEP_ID}" not found in purpose config.json`);
|
|
2553
|
+
var PLACEHOLDER_KEYS = Object.keys(_step.prompt.placeholders);
|
|
2554
|
+
function applyPlaceholders(template, inputs) {
|
|
2555
|
+
return PLACEHOLDER_KEYS.reduce(
|
|
2556
|
+
(text, key) => key in inputs ? text.replaceAll(`{${key}}`, inputs[key]) : text,
|
|
2557
|
+
template
|
|
2558
|
+
);
|
|
2559
|
+
}
|
|
2560
|
+
function getSystemPrompt5(inputs) {
|
|
2561
|
+
return applyPlaceholders(system_default4, inputs);
|
|
2562
|
+
}
|
|
2563
|
+
function getUserPrompt5(inputs) {
|
|
2564
|
+
return applyPlaceholders(user_default4, inputs);
|
|
2565
|
+
}
|
|
2566
|
+
|
|
2567
|
+
// ../../evals/prompts/purpose/input_schema.json
|
|
2568
|
+
var input_schema_default = {
|
|
2569
|
+
properties: {
|
|
2570
|
+
grade_level: {
|
|
2571
|
+
minimum: 3,
|
|
2572
|
+
maximum: 12}
|
|
2573
|
+
}
|
|
2574
|
+
};
|
|
2575
|
+
|
|
2576
|
+
// src/evaluators/purpose.ts
|
|
2577
|
+
var STEP_ID2 = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
|
|
2578
|
+
var _step2 = config_default.steps.find((s) => s.id === STEP_ID2);
|
|
2579
|
+
if (!_step2) throw new Error(`Step "${STEP_ID2}" not found in purpose config.json`);
|
|
2580
|
+
var STEP = _step2;
|
|
2581
|
+
var GRADE_MIN = input_schema_default.properties.grade_level.minimum;
|
|
2582
|
+
var GRADE_MAX = input_schema_default.properties.grade_level.maximum;
|
|
2583
|
+
var SUPPORTED_GRADES = Array.from({ length: GRADE_MAX - GRADE_MIN + 1 }, (_, i) => String(GRADE_MIN + i));
|
|
2584
|
+
var COMPLEXITY_SCORE_DISPLAY = {
|
|
2585
|
+
"slightly_complex": "Slightly complex",
|
|
2586
|
+
"moderately_complex": "Moderately complex",
|
|
2587
|
+
"very_complex": "Very complex",
|
|
2588
|
+
"exceedingly_complex": "Exceedingly complex",
|
|
2589
|
+
"more_context_needed": "More context needed"
|
|
2590
|
+
};
|
|
2591
|
+
var PurposeEvaluator = class _PurposeEvaluator extends BaseEvaluator {
|
|
2592
|
+
static metadata = {
|
|
2593
|
+
id: config_default.evaluator.id,
|
|
2594
|
+
name: config_default.evaluator.name,
|
|
2595
|
+
description: config_default.evaluator.description,
|
|
2596
|
+
supportedGrades: SUPPORTED_GRADES,
|
|
2597
|
+
defaultProviders: ["google" /* Google */]
|
|
2598
|
+
};
|
|
2599
|
+
static TEMPERATURE = STEP.generation.temperature;
|
|
2600
|
+
static computeFkScore(text) {
|
|
2601
|
+
const fkStep = config_default.preprocessing.find((p) => p.id === "fk_score");
|
|
2602
|
+
if (!fkStep) throw new Error("fk_score preprocessing step not found in purpose config.json");
|
|
2603
|
+
return runPreprocessingStep(text, fkStep.implementation.typescript);
|
|
2604
|
+
}
|
|
2605
|
+
provider;
|
|
2606
|
+
constructor(config) {
|
|
2607
|
+
super(config);
|
|
2608
|
+
this.provider = this.createConfiguredProvider(
|
|
2609
|
+
"google" /* Google */,
|
|
2610
|
+
STEP.model.name,
|
|
2611
|
+
config.googleApiKey
|
|
2612
|
+
);
|
|
2613
|
+
}
|
|
2614
|
+
/**
|
|
2615
|
+
* Evaluate purpose complexity for a given text and grade level
|
|
2616
|
+
*
|
|
2617
|
+
* @param text - The text to evaluate
|
|
2618
|
+
* @param grade - The target grade level (3-12)
|
|
2619
|
+
* @returns Evaluation result with complexity score and detailed analysis
|
|
2620
|
+
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2621
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2622
|
+
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
2623
|
+
*/
|
|
2624
|
+
async evaluate(text, grade) {
|
|
2625
|
+
this.logger.info("Starting Purpose evaluation", {
|
|
2626
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2627
|
+
operation: "evaluate",
|
|
2628
|
+
grade,
|
|
2629
|
+
textLength: text.length
|
|
2630
|
+
});
|
|
2631
|
+
const startTime = Date.now();
|
|
2632
|
+
const stageDetails = [];
|
|
2633
|
+
try {
|
|
2634
|
+
this.validateText(text);
|
|
2635
|
+
const gradeNum = this.parseAndValidateGrade(grade);
|
|
2636
|
+
const fkScore = _PurposeEvaluator.computeFkScore(text);
|
|
2637
|
+
const inputs = {
|
|
2638
|
+
text,
|
|
2639
|
+
grade_level: String(gradeNum),
|
|
2640
|
+
fk_score: String(fkScore)
|
|
2641
|
+
};
|
|
2642
|
+
const response = await this.callLLM(inputs);
|
|
2643
|
+
const latencyMs = Date.now() - startTime;
|
|
2644
|
+
const tokenUsage = {
|
|
2645
|
+
input_tokens: response.usage.inputTokens,
|
|
2646
|
+
output_tokens: response.usage.outputTokens
|
|
2647
|
+
};
|
|
2648
|
+
stageDetails.push({
|
|
2649
|
+
stage: STEP.id,
|
|
2650
|
+
provider: this.provider.label,
|
|
2651
|
+
latency_ms: response.latencyMs,
|
|
2652
|
+
token_usage: tokenUsage
|
|
2653
|
+
});
|
|
2654
|
+
const result = {
|
|
2655
|
+
score: COMPLEXITY_SCORE_DISPLAY[response.data.complexity_score],
|
|
2656
|
+
reasoning: response.data.reasoning,
|
|
2657
|
+
metadata: {
|
|
2658
|
+
model: this.provider.label,
|
|
2659
|
+
processingTimeMs: latencyMs,
|
|
2660
|
+
inputTokens: tokenUsage.input_tokens,
|
|
2661
|
+
outputTokens: tokenUsage.output_tokens
|
|
2662
|
+
},
|
|
2663
|
+
_internal: response.data
|
|
2664
|
+
};
|
|
2665
|
+
this.sendTelemetry({
|
|
2666
|
+
status: "success",
|
|
2667
|
+
latencyMs,
|
|
2668
|
+
textLength: text.length,
|
|
2669
|
+
grade: String(gradeNum),
|
|
2670
|
+
provider: this.provider.label,
|
|
2671
|
+
tokenUsage,
|
|
2672
|
+
metadata: { stage_details: stageDetails },
|
|
2673
|
+
inputText: text
|
|
2674
|
+
}).catch(() => void 0);
|
|
2675
|
+
this.logger.info("Purpose evaluation completed successfully", {
|
|
2676
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2677
|
+
operation: "evaluate",
|
|
2678
|
+
grade: gradeNum,
|
|
2679
|
+
score: result.score,
|
|
2680
|
+
processingTimeMs: latencyMs
|
|
2681
|
+
});
|
|
2682
|
+
return result;
|
|
2683
|
+
} catch (error) {
|
|
2684
|
+
const latencyMs = Date.now() - startTime;
|
|
2685
|
+
this.logger.error("Purpose evaluation failed", {
|
|
2686
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2687
|
+
operation: "evaluate",
|
|
2688
|
+
grade,
|
|
2689
|
+
error: error instanceof Error ? error : void 0,
|
|
2690
|
+
processingTimeMs: latencyMs
|
|
2691
|
+
});
|
|
2692
|
+
const tokenUsage = stageDetails.length > 0 ? {
|
|
2693
|
+
input_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.input_tokens ?? 0), 0),
|
|
2694
|
+
output_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.output_tokens ?? 0), 0)
|
|
2695
|
+
} : void 0;
|
|
2696
|
+
this.sendTelemetry({
|
|
2697
|
+
status: "error",
|
|
2698
|
+
latencyMs,
|
|
2699
|
+
textLength: text.length,
|
|
2700
|
+
grade: String(grade),
|
|
2701
|
+
provider: this.provider.label,
|
|
2702
|
+
tokenUsage,
|
|
2703
|
+
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
2704
|
+
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
2705
|
+
inputText: text
|
|
2706
|
+
}).catch(() => void 0);
|
|
2707
|
+
if (error instanceof ValidationError) throw error;
|
|
2708
|
+
throw wrapProviderError(error, "Purpose evaluation failed");
|
|
2709
|
+
}
|
|
2710
|
+
}
|
|
2711
|
+
parseAndValidateGrade(grade) {
|
|
2712
|
+
const num = Number(grade.trim());
|
|
2713
|
+
if (!Number.isInteger(num) || num < GRADE_MIN || num > GRADE_MAX) {
|
|
2714
|
+
throw new ValidationError(
|
|
2715
|
+
`Invalid grade "${grade}". Purpose evaluator supports integer grades ${GRADE_MIN}\u2013${GRADE_MAX}.`
|
|
2716
|
+
);
|
|
2717
|
+
}
|
|
2718
|
+
return num;
|
|
2719
|
+
}
|
|
2720
|
+
async callLLM(inputs) {
|
|
2721
|
+
const response = await this.provider.generateStructured({
|
|
2722
|
+
messages: [
|
|
2723
|
+
{ role: "system", content: getSystemPrompt5(inputs) },
|
|
2724
|
+
{ role: "user", content: getUserPrompt5(inputs) }
|
|
2725
|
+
],
|
|
2726
|
+
schema: PurposeOutputSchema,
|
|
2727
|
+
temperature: _PurposeEvaluator.TEMPERATURE
|
|
2728
|
+
});
|
|
2729
|
+
return { data: response.data, usage: response.usage, latencyMs: response.latencyMs };
|
|
2730
|
+
}
|
|
2731
|
+
};
|
|
2732
|
+
async function evaluatePurpose(text, grade, config) {
|
|
2733
|
+
return new PurposeEvaluator(config).evaluate(text, grade);
|
|
2734
|
+
}
|
|
2735
|
+
|
|
2331
2736
|
exports.APIError = APIError;
|
|
2332
2737
|
exports.AuthenticationError = AuthenticationError;
|
|
2333
2738
|
exports.ComplexityClassificationSchema = ComplexityClassificationSchema;
|
|
@@ -2339,6 +2744,9 @@ exports.GradeLevelAppropriatenessEvaluator = GradeLevelAppropriatenessEvaluator;
|
|
|
2339
2744
|
exports.GradeLevelAppropriatenessSchema = GradeLevelAppropriatenessSchema;
|
|
2340
2745
|
exports.LogLevel = LogLevel;
|
|
2341
2746
|
exports.NetworkError = NetworkError;
|
|
2747
|
+
exports.Provider = Provider;
|
|
2748
|
+
exports.Providers = Providers;
|
|
2749
|
+
exports.PurposeEvaluator = PurposeEvaluator;
|
|
2342
2750
|
exports.RateLimitError = RateLimitError;
|
|
2343
2751
|
exports.SentenceAnalysisSchema = SentenceAnalysisSchema;
|
|
2344
2752
|
exports.SentenceStructureEvaluator = SentenceStructureEvaluator;
|
|
@@ -2353,6 +2761,7 @@ exports.calculateFleschKincaidGrade = calculateFleschKincaidGrade;
|
|
|
2353
2761
|
exports.calculateReadabilityMetrics = calculateReadabilityMetrics;
|
|
2354
2762
|
exports.evaluateConventionality = evaluateConventionality;
|
|
2355
2763
|
exports.evaluateGradeLevelAppropriateness = evaluateGradeLevelAppropriateness;
|
|
2764
|
+
exports.evaluatePurpose = evaluatePurpose;
|
|
2356
2765
|
exports.evaluateSentenceStructure = evaluateSentenceStructure;
|
|
2357
2766
|
exports.evaluateSmk = evaluateSmk;
|
|
2358
2767
|
exports.evaluateTextComplexity = evaluateTextComplexity;
|