@learning-commons/evaluators 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +176 -45
- package/dist/{base-Ced9oKKa.d.cts → base-DKcAYXfb.d.cts} +142 -9
- package/dist/{base-Ced9oKKa.d.ts → base-DKcAYXfb.d.ts} +142 -9
- package/dist/batch/cli.js +597 -211
- package/dist/batch/cli.js.map +1 -1
- package/dist/batch/index.cjs +598 -211
- package/dist/batch/index.cjs.map +1 -1
- package/dist/batch/index.d.cts +2 -1
- package/dist/batch/index.d.ts +2 -1
- package/dist/batch/index.js +597 -211
- package/dist/batch/index.js.map +1 -1
- package/dist/index.cjs +609 -212
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +153 -86
- package/dist/index.d.ts +153 -86
- package/dist/index.js +605 -213
- package/dist/index.js.map +1 -1
- package/package.json +8 -3
- package/src/batch/README.md +1 -1
package/dist/batch/index.cjs
CHANGED
|
@@ -10,6 +10,7 @@ var ai = require('ai');
|
|
|
10
10
|
var zod = require('zod');
|
|
11
11
|
var nlp = require('compromise');
|
|
12
12
|
var syllable = require('syllable');
|
|
13
|
+
var textReadability = require('text-readability');
|
|
13
14
|
var sync = require('csv-parse/sync');
|
|
14
15
|
|
|
15
16
|
var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null;
|
|
@@ -36,6 +37,7 @@ function _interopNamespace(e) {
|
|
|
36
37
|
var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
|
|
37
38
|
var fs__namespace = /*#__PURE__*/_interopNamespace(fs);
|
|
38
39
|
var nlp__default = /*#__PURE__*/_interopDefault(nlp);
|
|
40
|
+
var textReadability__default = /*#__PURE__*/_interopDefault(textReadability);
|
|
39
41
|
|
|
40
42
|
// src/batch/evaluator.ts
|
|
41
43
|
|
|
@@ -198,8 +200,9 @@ var TimeoutError = class extends APIError {
|
|
|
198
200
|
function parseProviderError(error) {
|
|
199
201
|
if (error instanceof Error) {
|
|
200
202
|
const message = error.message;
|
|
203
|
+
const err = error;
|
|
201
204
|
const statusMatch = message.match(/\b(4\d{2}|5\d{2})\b/);
|
|
202
|
-
const statusCode = statusMatch ? parseInt(statusMatch[1]) : void 0;
|
|
205
|
+
const statusCode = err.statusCode ?? err.status ?? (statusMatch ? parseInt(statusMatch[1]) : void 0);
|
|
203
206
|
return {
|
|
204
207
|
message,
|
|
205
208
|
statusCode,
|
|
@@ -212,6 +215,11 @@ function parseProviderError(error) {
|
|
|
212
215
|
}
|
|
213
216
|
function wrapProviderError(error, defaultMessage = "API request failed") {
|
|
214
217
|
const { message, statusCode, code } = parseProviderError(error);
|
|
218
|
+
if (statusCode === 404 || statusCode === 400 && /\bmodel\b.*(not found|does not exist|invalid)/i.test(message)) {
|
|
219
|
+
return new ConfigurationError(
|
|
220
|
+
`Model not found or invalid: ${message}. Check the model ID passed to the provider.`
|
|
221
|
+
);
|
|
222
|
+
}
|
|
215
223
|
if (statusCode === 401 || statusCode === 403) {
|
|
216
224
|
return new AuthenticationError(
|
|
217
225
|
message.includes("API key") ? message : "Invalid API key",
|
|
@@ -286,6 +294,111 @@ function createLogger(customLogger, level = 2 /* WARN */) {
|
|
|
286
294
|
}
|
|
287
295
|
return new ConsoleLogger(level);
|
|
288
296
|
}
|
|
297
|
+
var VercelAIProvider = class {
|
|
298
|
+
constructor(config) {
|
|
299
|
+
this.config = config;
|
|
300
|
+
if (config.type === "custom") {
|
|
301
|
+
throw new Error(
|
|
302
|
+
"VercelAIProvider does not support custom type. Use config.customProvider directly."
|
|
303
|
+
);
|
|
304
|
+
}
|
|
305
|
+
if (!config.model || config.model.trim() === "") {
|
|
306
|
+
throw new Error(
|
|
307
|
+
`model is required for VercelAIProvider (type: "${config.type}"). No default is assumed.`
|
|
308
|
+
);
|
|
309
|
+
}
|
|
310
|
+
this.model = config.model;
|
|
311
|
+
this.label = `${config.type}:${config.model}`;
|
|
312
|
+
}
|
|
313
|
+
label;
|
|
314
|
+
model;
|
|
315
|
+
/**
|
|
316
|
+
* Generate structured output using Vercel AI SDK's generateText with output
|
|
317
|
+
*/
|
|
318
|
+
async generateStructured(request) {
|
|
319
|
+
const model = await this.getModel();
|
|
320
|
+
const startTime = Date.now();
|
|
321
|
+
const { output, usage } = await ai.generateText({
|
|
322
|
+
model,
|
|
323
|
+
messages: request.messages,
|
|
324
|
+
output: ai.Output.object({ schema: request.schema }),
|
|
325
|
+
temperature: request.temperature ?? 0,
|
|
326
|
+
maxRetries: this.config.maxRetries ?? 0,
|
|
327
|
+
...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
|
|
328
|
+
});
|
|
329
|
+
return {
|
|
330
|
+
data: output,
|
|
331
|
+
model: this.model,
|
|
332
|
+
usage: {
|
|
333
|
+
inputTokens: usage.inputTokens || 0,
|
|
334
|
+
outputTokens: usage.outputTokens || 0
|
|
335
|
+
},
|
|
336
|
+
latencyMs: Date.now() - startTime
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
/**
|
|
340
|
+
* Generate plain text using Vercel AI SDK's generateText
|
|
341
|
+
*/
|
|
342
|
+
async generateText(messages, temperature) {
|
|
343
|
+
const model = await this.getModel();
|
|
344
|
+
const startTime = Date.now();
|
|
345
|
+
const { text, usage } = await ai.generateText({
|
|
346
|
+
model,
|
|
347
|
+
messages,
|
|
348
|
+
temperature: temperature ?? this.config.temperature ?? 0,
|
|
349
|
+
maxRetries: this.config.maxRetries ?? 0
|
|
350
|
+
});
|
|
351
|
+
return {
|
|
352
|
+
text,
|
|
353
|
+
usage: {
|
|
354
|
+
inputTokens: usage.inputTokens || 0,
|
|
355
|
+
outputTokens: usage.outputTokens || 0
|
|
356
|
+
},
|
|
357
|
+
latencyMs: Date.now() - startTime
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
/**
|
|
361
|
+
* Get the configured language model.
|
|
362
|
+
* Uses dynamic imports so consumers only need to install the provider packages they use.
|
|
363
|
+
*/
|
|
364
|
+
async getModel() {
|
|
365
|
+
const apiKey = this.config.apiKey;
|
|
366
|
+
switch (this.config.type) {
|
|
367
|
+
case "openai": {
|
|
368
|
+
const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
|
|
369
|
+
throw new Error(
|
|
370
|
+
"To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
|
|
371
|
+
);
|
|
372
|
+
});
|
|
373
|
+
return createOpenAI(apiKey ? { apiKey } : {})(this.model);
|
|
374
|
+
}
|
|
375
|
+
case "anthropic": {
|
|
376
|
+
const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
|
|
377
|
+
throw new Error(
|
|
378
|
+
"To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
|
|
379
|
+
);
|
|
380
|
+
});
|
|
381
|
+
return createAnthropic(apiKey ? { apiKey } : {})(this.model);
|
|
382
|
+
}
|
|
383
|
+
case "google": {
|
|
384
|
+
const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
|
|
385
|
+
throw new Error(
|
|
386
|
+
"To use the Google provider, install its adapter: npm install @ai-sdk/google"
|
|
387
|
+
);
|
|
388
|
+
});
|
|
389
|
+
return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(this.model);
|
|
390
|
+
}
|
|
391
|
+
default:
|
|
392
|
+
throw new Error(`Unsupported provider type: ${this.config.type}`);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
};
|
|
396
|
+
function createProvider(config) {
|
|
397
|
+
if (config.type === "custom" && config.customProvider) {
|
|
398
|
+
return config.customProvider;
|
|
399
|
+
}
|
|
400
|
+
return new VercelAIProvider(config);
|
|
401
|
+
}
|
|
289
402
|
|
|
290
403
|
// src/evaluators/base.ts
|
|
291
404
|
var VALIDATION_LIMITS = {
|
|
@@ -294,6 +407,12 @@ var VALIDATION_LIMITS = {
|
|
|
294
407
|
/** Maximum text length in characters (100K chars ≈ 25K tokens) */
|
|
295
408
|
MAX_TEXT_LENGTH: 1e5
|
|
296
409
|
};
|
|
410
|
+
var Provider = /* @__PURE__ */ ((Provider2) => {
|
|
411
|
+
Provider2["OpenAI"] = "openai";
|
|
412
|
+
Provider2["Google"] = "google";
|
|
413
|
+
Provider2["Anthropic"] = "anthropic";
|
|
414
|
+
return Provider2;
|
|
415
|
+
})(Provider || {});
|
|
297
416
|
var BaseEvaluator = class {
|
|
298
417
|
telemetryClient;
|
|
299
418
|
logger;
|
|
@@ -311,21 +430,35 @@ var BaseEvaluator = class {
|
|
|
311
430
|
* name: 'My Evaluator',
|
|
312
431
|
* description: 'Does something useful',
|
|
313
432
|
* supportedGrades: ['3', '4', '5'],
|
|
314
|
-
*
|
|
315
|
-
* requiresOpenAIKey: false,
|
|
433
|
+
* defaultProviders: [Provider.Google],
|
|
316
434
|
* };
|
|
317
435
|
* }
|
|
318
436
|
* ```
|
|
319
437
|
*/
|
|
320
438
|
static metadata;
|
|
439
|
+
/**
|
|
440
|
+
* @throws {ConfigurationError} If the subclass has not defined static metadata
|
|
441
|
+
* @throws {ConfigurationError} If modelOverride has an invalid provider or empty model
|
|
442
|
+
* @throws {ConfigurationError} If a required API key is missing
|
|
443
|
+
*/
|
|
321
444
|
constructor(config) {
|
|
322
445
|
this.logger = createLogger(config.logger, config.logLevel ?? 2 /* WARN */);
|
|
446
|
+
this.validateModelOverride(config);
|
|
323
447
|
this.validateApiKeys(config);
|
|
324
448
|
const telemetryConfig = this.normalizeTelemetryConfig(config.telemetry);
|
|
325
449
|
this.config = {
|
|
326
450
|
maxRetries: config.maxRetries ?? 2,
|
|
327
|
-
telemetry: telemetryConfig
|
|
451
|
+
telemetry: telemetryConfig,
|
|
452
|
+
modelOverride: config.modelOverride,
|
|
453
|
+
googleApiKey: config.googleApiKey,
|
|
454
|
+
openaiApiKey: config.openaiApiKey,
|
|
455
|
+
anthropicApiKey: config.anthropicApiKey
|
|
328
456
|
};
|
|
457
|
+
if (config.modelOverride) {
|
|
458
|
+
this.logger.warn(
|
|
459
|
+
`modelOverride is active: using ${config.modelOverride.provider}:${config.modelOverride.model} instead of the default model. Evaluation quality may differ from recommended defaults.`
|
|
460
|
+
);
|
|
461
|
+
}
|
|
329
462
|
if (this.config.telemetry.enabled) {
|
|
330
463
|
this.telemetryClient = new TelemetryClient({
|
|
331
464
|
endpoint: "https://api.learningcommons.org/evaluators-telemetry/v1/events",
|
|
@@ -350,21 +483,62 @@ var BaseEvaluator = class {
|
|
|
350
483
|
return meta;
|
|
351
484
|
}
|
|
352
485
|
/**
|
|
353
|
-
* Validate
|
|
354
|
-
*
|
|
486
|
+
* Validate modelOverride shape: provider must be a known Provider value and
|
|
487
|
+
* model must be a non-empty string.
|
|
488
|
+
* @throws {ConfigurationError} If the override is malformed
|
|
355
489
|
*/
|
|
356
|
-
|
|
357
|
-
if (
|
|
490
|
+
validateModelOverride(config) {
|
|
491
|
+
if (!config.modelOverride) return;
|
|
492
|
+
const validProviders = Object.values(Provider);
|
|
493
|
+
if (!validProviders.includes(config.modelOverride.provider)) {
|
|
358
494
|
throw new ConfigurationError(
|
|
359
|
-
`
|
|
495
|
+
`Invalid provider "${config.modelOverride.provider}" in modelOverride. Valid providers are: ${validProviders.join(", ")}.`
|
|
360
496
|
);
|
|
361
497
|
}
|
|
362
|
-
if (
|
|
498
|
+
if (!config.modelOverride.model || config.modelOverride.model.trim() === "") {
|
|
363
499
|
throw new ConfigurationError(
|
|
364
|
-
`
|
|
500
|
+
`modelOverride.model is required. Specify the model ID for provider "${config.modelOverride.provider}".`
|
|
365
501
|
);
|
|
366
502
|
}
|
|
367
503
|
}
|
|
504
|
+
/**
|
|
505
|
+
* Validate that the required API key is present.
|
|
506
|
+
* When modelOverride is set, checks the override provider's key.
|
|
507
|
+
* Otherwise checks the keys required by the evaluator's default providers.
|
|
508
|
+
* @throws {ConfigurationError} If a required key is missing
|
|
509
|
+
*/
|
|
510
|
+
validateApiKeys(config) {
|
|
511
|
+
const keyFor = {
|
|
512
|
+
["openai" /* OpenAI */]: config.openaiApiKey?.trim() || void 0,
|
|
513
|
+
["google" /* Google */]: config.googleApiKey?.trim() || void 0,
|
|
514
|
+
["anthropic" /* Anthropic */]: config.anthropicApiKey?.trim() || void 0
|
|
515
|
+
};
|
|
516
|
+
const humanName = {
|
|
517
|
+
["openai" /* OpenAI */]: "OpenAI API key",
|
|
518
|
+
["google" /* Google */]: "Google API key",
|
|
519
|
+
["anthropic" /* Anthropic */]: "Anthropic API key"
|
|
520
|
+
};
|
|
521
|
+
const configKey = {
|
|
522
|
+
["openai" /* OpenAI */]: "openaiApiKey",
|
|
523
|
+
["google" /* Google */]: "googleApiKey",
|
|
524
|
+
["anthropic" /* Anthropic */]: "anthropicApiKey"
|
|
525
|
+
};
|
|
526
|
+
if (config.modelOverride) {
|
|
527
|
+
if (!keyFor[config.modelOverride.provider]) {
|
|
528
|
+
throw new ConfigurationError(
|
|
529
|
+
`${humanName[config.modelOverride.provider]} is required when using modelOverride with provider "${config.modelOverride.provider}". Pass ${configKey[config.modelOverride.provider]} in config.`
|
|
530
|
+
);
|
|
531
|
+
}
|
|
532
|
+
return;
|
|
533
|
+
}
|
|
534
|
+
for (const provider of this.metadata.defaultProviders) {
|
|
535
|
+
if (!keyFor[provider]) {
|
|
536
|
+
throw new ConfigurationError(
|
|
537
|
+
`${humanName[provider]} is required for ${this.metadata.name} evaluator. Pass ${configKey[provider]} in config.`
|
|
538
|
+
);
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
}
|
|
368
542
|
/**
|
|
369
543
|
* Normalize telemetry config to standard format
|
|
370
544
|
*/
|
|
@@ -445,6 +619,33 @@ var BaseEvaluator = class {
|
|
|
445
619
|
);
|
|
446
620
|
}
|
|
447
621
|
}
|
|
622
|
+
/**
|
|
623
|
+
* Create an LLM provider, honouring modelOverride if set.
|
|
624
|
+
* When override is active, the key for the override provider is resolved
|
|
625
|
+
* from the matching top-level config field (e.g. anthropicApiKey for Anthropic).
|
|
626
|
+
*/
|
|
627
|
+
createConfiguredProvider(defaultType, defaultModel, defaultApiKey) {
|
|
628
|
+
const override = this.config.modelOverride;
|
|
629
|
+
if (override) {
|
|
630
|
+
const apiKeyFor = {
|
|
631
|
+
["openai" /* OpenAI */]: this.config.openaiApiKey,
|
|
632
|
+
["google" /* Google */]: this.config.googleApiKey,
|
|
633
|
+
["anthropic" /* Anthropic */]: this.config.anthropicApiKey
|
|
634
|
+
};
|
|
635
|
+
return createProvider({
|
|
636
|
+
type: override.provider,
|
|
637
|
+
model: override.model,
|
|
638
|
+
apiKey: apiKeyFor[override.provider],
|
|
639
|
+
maxRetries: this.config.maxRetries
|
|
640
|
+
});
|
|
641
|
+
}
|
|
642
|
+
return createProvider({
|
|
643
|
+
type: defaultType,
|
|
644
|
+
model: defaultModel,
|
|
645
|
+
apiKey: defaultApiKey,
|
|
646
|
+
maxRetries: this.config.maxRetries
|
|
647
|
+
});
|
|
648
|
+
}
|
|
448
649
|
/**
|
|
449
650
|
* Send telemetry event to analytics service
|
|
450
651
|
* Common helper for all evaluators
|
|
@@ -465,123 +666,12 @@ var BaseEvaluator = class {
|
|
|
465
666
|
provider: params.provider,
|
|
466
667
|
token_usage: params.tokenUsage,
|
|
467
668
|
metadata: params.metadata,
|
|
669
|
+
model_override: this.config.modelOverride ? true : void 0,
|
|
468
670
|
// Include input text only if recording is enabled
|
|
469
671
|
input_text: this.config.telemetry.recordInputs ? params.inputText : void 0
|
|
470
672
|
});
|
|
471
673
|
}
|
|
472
674
|
};
|
|
473
|
-
var DEFAULT_MODELS = {
|
|
474
|
-
openai: "gpt-4o",
|
|
475
|
-
anthropic: "claude-sonnet-4-5-20250929",
|
|
476
|
-
google: "gemini-2.5-pro"
|
|
477
|
-
};
|
|
478
|
-
var VercelAIProvider = class {
|
|
479
|
-
constructor(config) {
|
|
480
|
-
this.config = config;
|
|
481
|
-
if (config.type === "custom") {
|
|
482
|
-
throw new Error(
|
|
483
|
-
"VercelAIProvider does not support custom type. Use config.customProvider directly."
|
|
484
|
-
);
|
|
485
|
-
}
|
|
486
|
-
}
|
|
487
|
-
/**
|
|
488
|
-
* Generate structured output using Vercel AI SDK's generateText with output
|
|
489
|
-
*/
|
|
490
|
-
async generateStructured(request) {
|
|
491
|
-
const model = await this.getModel(request.model);
|
|
492
|
-
const startTime = Date.now();
|
|
493
|
-
const { output, usage } = await ai.generateText({
|
|
494
|
-
model,
|
|
495
|
-
messages: request.messages,
|
|
496
|
-
output: ai.Output.object({ schema: request.schema }),
|
|
497
|
-
temperature: request.temperature ?? 0,
|
|
498
|
-
maxRetries: this.config.maxRetries ?? 0,
|
|
499
|
-
...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
|
|
500
|
-
});
|
|
501
|
-
return {
|
|
502
|
-
data: output,
|
|
503
|
-
model: request.model || this.getDefaultModel(),
|
|
504
|
-
usage: {
|
|
505
|
-
inputTokens: usage.inputTokens || 0,
|
|
506
|
-
outputTokens: usage.outputTokens || 0
|
|
507
|
-
},
|
|
508
|
-
latencyMs: Date.now() - startTime
|
|
509
|
-
};
|
|
510
|
-
}
|
|
511
|
-
/**
|
|
512
|
-
* Generate plain text using Vercel AI SDK's generateText
|
|
513
|
-
*/
|
|
514
|
-
async generateText(messages, temperature) {
|
|
515
|
-
const model = await this.getModel();
|
|
516
|
-
const startTime = Date.now();
|
|
517
|
-
const { text, usage } = await ai.generateText({
|
|
518
|
-
model,
|
|
519
|
-
messages,
|
|
520
|
-
temperature: temperature ?? this.config.temperature ?? 0,
|
|
521
|
-
maxRetries: this.config.maxRetries ?? 0
|
|
522
|
-
});
|
|
523
|
-
return {
|
|
524
|
-
text,
|
|
525
|
-
usage: {
|
|
526
|
-
inputTokens: usage.inputTokens || 0,
|
|
527
|
-
outputTokens: usage.outputTokens || 0
|
|
528
|
-
},
|
|
529
|
-
latencyMs: Date.now() - startTime
|
|
530
|
-
};
|
|
531
|
-
}
|
|
532
|
-
/**
|
|
533
|
-
* Get the configured language model.
|
|
534
|
-
* Uses dynamic imports so consumers only need to install the provider packages they use.
|
|
535
|
-
*/
|
|
536
|
-
async getModel(requestModel) {
|
|
537
|
-
const modelId = requestModel || this.config.model || this.getDefaultModel();
|
|
538
|
-
const apiKey = this.config.apiKey;
|
|
539
|
-
switch (this.config.type) {
|
|
540
|
-
case "openai": {
|
|
541
|
-
const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
|
|
542
|
-
throw new Error(
|
|
543
|
-
"To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
|
|
544
|
-
);
|
|
545
|
-
});
|
|
546
|
-
return createOpenAI(apiKey ? { apiKey } : {})(modelId);
|
|
547
|
-
}
|
|
548
|
-
case "anthropic": {
|
|
549
|
-
const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
|
|
550
|
-
throw new Error(
|
|
551
|
-
"To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
|
|
552
|
-
);
|
|
553
|
-
});
|
|
554
|
-
return createAnthropic(apiKey ? { apiKey } : {})(modelId);
|
|
555
|
-
}
|
|
556
|
-
case "google": {
|
|
557
|
-
const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
|
|
558
|
-
throw new Error(
|
|
559
|
-
"To use the Google provider, install its adapter: npm install @ai-sdk/google"
|
|
560
|
-
);
|
|
561
|
-
});
|
|
562
|
-
return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(modelId);
|
|
563
|
-
}
|
|
564
|
-
default:
|
|
565
|
-
throw new Error(`Unsupported provider type: ${this.config.type}`);
|
|
566
|
-
}
|
|
567
|
-
}
|
|
568
|
-
/**
|
|
569
|
-
* Get default model for the configured provider
|
|
570
|
-
*/
|
|
571
|
-
getDefaultModel() {
|
|
572
|
-
const providerType = this.config.type;
|
|
573
|
-
if (providerType === "custom") {
|
|
574
|
-
throw new Error("Cannot get default model for custom provider type");
|
|
575
|
-
}
|
|
576
|
-
return DEFAULT_MODELS[providerType];
|
|
577
|
-
}
|
|
578
|
-
};
|
|
579
|
-
function createProvider(config) {
|
|
580
|
-
if (config.type === "custom" && config.customProvider) {
|
|
581
|
-
return config.customProvider;
|
|
582
|
-
}
|
|
583
|
-
return new VercelAIProvider(config);
|
|
584
|
-
}
|
|
585
675
|
var TextComplexityLevel = zod.z.enum([
|
|
586
676
|
"Slightly complex",
|
|
587
677
|
"Moderately complex",
|
|
@@ -783,6 +873,44 @@ function featuresToJSON(features, decimals = 1, castToInt = true) {
|
|
|
783
873
|
}
|
|
784
874
|
return JSON.stringify(payload, null, 2);
|
|
785
875
|
}
|
|
876
|
+
var LIBRARY_ADAPTERS = {
|
|
877
|
+
"text-readability": {
|
|
878
|
+
call(fnName, text) {
|
|
879
|
+
const fn = textReadability__default.default[fnName];
|
|
880
|
+
if (typeof fn !== "function") {
|
|
881
|
+
throw new Error(`Function "${fnName}" not found in text-readability.`);
|
|
882
|
+
}
|
|
883
|
+
return fn.call(textReadability__default.default, text);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
};
|
|
887
|
+
var POST_TRANSFORMS = {
|
|
888
|
+
round(value, { precision = 0 }) {
|
|
889
|
+
const factor = 10 ** precision;
|
|
890
|
+
return Math.round(value * factor) / factor;
|
|
891
|
+
}
|
|
892
|
+
};
|
|
893
|
+
function runPreprocessingStep(text, impl) {
|
|
894
|
+
const adapter = LIBRARY_ADAPTERS[impl.library];
|
|
895
|
+
if (!adapter) {
|
|
896
|
+
const supported = Object.keys(LIBRARY_ADAPTERS).join(", ");
|
|
897
|
+
throw new Error(
|
|
898
|
+
`Unsupported preprocessing library "${impl.library}". Supported: ${supported}.`
|
|
899
|
+
);
|
|
900
|
+
}
|
|
901
|
+
let result = adapter.call(impl.function, text);
|
|
902
|
+
if (impl.post_transform) {
|
|
903
|
+
const transform = POST_TRANSFORMS[impl.post_transform.type];
|
|
904
|
+
if (!transform) {
|
|
905
|
+
const supported = Object.keys(POST_TRANSFORMS).join(", ");
|
|
906
|
+
throw new Error(
|
|
907
|
+
`Unsupported post_transform type "${impl.post_transform.type}". Supported: ${supported}.`
|
|
908
|
+
);
|
|
909
|
+
}
|
|
910
|
+
result = transform(result, impl.post_transform);
|
|
911
|
+
}
|
|
912
|
+
return result;
|
|
913
|
+
}
|
|
786
914
|
|
|
787
915
|
// ../../evals/prompts/vocabulary/background-knowledge.txt
|
|
788
916
|
var background_knowledge_default = `
|
|
@@ -1088,32 +1216,28 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1088
1216
|
name: "Vocabulary",
|
|
1089
1217
|
description: "Evaluates vocabulary complexity of educational texts relative to grade level",
|
|
1090
1218
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1091
|
-
|
|
1092
|
-
requiresOpenAIKey: true
|
|
1219
|
+
defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
|
|
1093
1220
|
};
|
|
1094
1221
|
grades34ComplexityProvider;
|
|
1095
1222
|
otherGradesComplexityProvider;
|
|
1096
1223
|
backgroundKnowledgeProvider;
|
|
1097
1224
|
constructor(config) {
|
|
1098
1225
|
super(config);
|
|
1099
|
-
this.grades34ComplexityProvider =
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
apiKey: config.openaiApiKey,
|
|
1115
|
-
maxRetries: this.config.maxRetries
|
|
1116
|
-
});
|
|
1226
|
+
this.grades34ComplexityProvider = this.createConfiguredProvider(
|
|
1227
|
+
"google" /* Google */,
|
|
1228
|
+
"gemini-2.5-pro",
|
|
1229
|
+
config.googleApiKey
|
|
1230
|
+
);
|
|
1231
|
+
this.otherGradesComplexityProvider = this.createConfiguredProvider(
|
|
1232
|
+
"openai" /* OpenAI */,
|
|
1233
|
+
"gpt-4.1-2025-04-14",
|
|
1234
|
+
config.openaiApiKey
|
|
1235
|
+
);
|
|
1236
|
+
this.backgroundKnowledgeProvider = this.createConfiguredProvider(
|
|
1237
|
+
"openai" /* OpenAI */,
|
|
1238
|
+
"gpt-4o-2024-11-20",
|
|
1239
|
+
config.openaiApiKey
|
|
1240
|
+
);
|
|
1117
1241
|
}
|
|
1118
1242
|
/**
|
|
1119
1243
|
* Evaluate vocabulary complexity for a given text and grade level
|
|
@@ -1122,6 +1246,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1122
1246
|
* @param grade - The target grade level (3-12)
|
|
1123
1247
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1124
1248
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1249
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1125
1250
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1126
1251
|
*/
|
|
1127
1252
|
async evaluate(text, grade) {
|
|
@@ -1133,7 +1258,9 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1133
1258
|
});
|
|
1134
1259
|
const startTime = Date.now();
|
|
1135
1260
|
const stageDetails = [];
|
|
1136
|
-
const
|
|
1261
|
+
const complexityProviderLabel = grade === "3" || grade === "4" ? this.grades34ComplexityProvider.label : this.otherGradesComplexityProvider.label;
|
|
1262
|
+
const backgroundProviderLabel = this.backgroundKnowledgeProvider.label;
|
|
1263
|
+
const modelLabel = this.config.modelOverride ? backgroundProviderLabel : `${backgroundProviderLabel}+${complexityProviderLabel}`;
|
|
1137
1264
|
try {
|
|
1138
1265
|
this.validateText(text);
|
|
1139
1266
|
this.validateGrade(grade, new Set(_VocabularyEvaluator.metadata.supportedGrades));
|
|
@@ -1144,7 +1271,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1144
1271
|
const bgResponse = await this.getBackgroundKnowledgeAssumption(text, grade);
|
|
1145
1272
|
stageDetails.push({
|
|
1146
1273
|
stage: "background_knowledge",
|
|
1147
|
-
provider:
|
|
1274
|
+
provider: backgroundProviderLabel,
|
|
1148
1275
|
latency_ms: bgResponse.latencyMs,
|
|
1149
1276
|
token_usage: {
|
|
1150
1277
|
input_tokens: bgResponse.usage.inputTokens,
|
|
@@ -1160,7 +1287,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1160
1287
|
);
|
|
1161
1288
|
stageDetails.push({
|
|
1162
1289
|
stage: "complexity_evaluation",
|
|
1163
|
-
provider:
|
|
1290
|
+
provider: complexityProviderLabel,
|
|
1164
1291
|
latency_ms: complexityResponse.latencyMs,
|
|
1165
1292
|
token_usage: {
|
|
1166
1293
|
input_tokens: complexityResponse.usage.inputTokens,
|
|
@@ -1176,7 +1303,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1176
1303
|
score: complexityResponse.data.complexity_score,
|
|
1177
1304
|
reasoning: complexityResponse.data.reasoning,
|
|
1178
1305
|
metadata: {
|
|
1179
|
-
model:
|
|
1306
|
+
model: modelLabel,
|
|
1180
1307
|
processingTimeMs: latencyMs
|
|
1181
1308
|
},
|
|
1182
1309
|
_internal: complexityResponse.data
|
|
@@ -1186,7 +1313,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1186
1313
|
latencyMs,
|
|
1187
1314
|
textLength: text.length,
|
|
1188
1315
|
grade,
|
|
1189
|
-
provider:
|
|
1316
|
+
provider: modelLabel,
|
|
1190
1317
|
tokenUsage: totalTokenUsage,
|
|
1191
1318
|
metadata: {
|
|
1192
1319
|
stage_details: stageDetails
|
|
@@ -1221,7 +1348,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1221
1348
|
latencyMs,
|
|
1222
1349
|
textLength: text.length,
|
|
1223
1350
|
grade,
|
|
1224
|
-
provider:
|
|
1351
|
+
provider: modelLabel,
|
|
1225
1352
|
tokenUsage: totalTokenUsage,
|
|
1226
1353
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1227
1354
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -1439,25 +1566,12 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1439
1566
|
name: "Sentence Structure",
|
|
1440
1567
|
description: "Evaluates sentence structure complexity based on grammatical features",
|
|
1441
1568
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1442
|
-
|
|
1443
|
-
requiresOpenAIKey: true
|
|
1569
|
+
defaultProviders: ["openai" /* OpenAI */]
|
|
1444
1570
|
};
|
|
1445
|
-
|
|
1446
|
-
complexityProvider;
|
|
1571
|
+
provider;
|
|
1447
1572
|
constructor(config) {
|
|
1448
1573
|
super(config);
|
|
1449
|
-
this.
|
|
1450
|
-
type: "openai",
|
|
1451
|
-
model: "gpt-4o",
|
|
1452
|
-
apiKey: config.openaiApiKey,
|
|
1453
|
-
maxRetries: this.config.maxRetries
|
|
1454
|
-
});
|
|
1455
|
-
this.complexityProvider = createProvider({
|
|
1456
|
-
type: "openai",
|
|
1457
|
-
model: "gpt-4o",
|
|
1458
|
-
apiKey: config.openaiApiKey,
|
|
1459
|
-
maxRetries: this.config.maxRetries
|
|
1460
|
-
});
|
|
1574
|
+
this.provider = this.createConfiguredProvider("openai" /* OpenAI */, "gpt-4o", config.openaiApiKey);
|
|
1461
1575
|
}
|
|
1462
1576
|
/**
|
|
1463
1577
|
* Evaluate sentence structure complexity for a given text and grade level
|
|
@@ -1466,6 +1580,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1466
1580
|
* @param grade - The target grade level (3-12)
|
|
1467
1581
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1468
1582
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1583
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1469
1584
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1470
1585
|
*/
|
|
1471
1586
|
async evaluate(text, grade) {
|
|
@@ -1487,7 +1602,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1487
1602
|
const analysisResponse = await this.analyzeSentenceStructure(text);
|
|
1488
1603
|
stageDetails.push({
|
|
1489
1604
|
stage: "sentence_analysis",
|
|
1490
|
-
provider:
|
|
1605
|
+
provider: this.provider.label,
|
|
1491
1606
|
latency_ms: analysisResponse.latencyMs,
|
|
1492
1607
|
token_usage: {
|
|
1493
1608
|
input_tokens: analysisResponse.usage.inputTokens,
|
|
@@ -1502,7 +1617,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1502
1617
|
const complexityResponse = await this.classifyComplexity(features, grade, text);
|
|
1503
1618
|
stageDetails.push({
|
|
1504
1619
|
stage: "complexity_classification",
|
|
1505
|
-
provider:
|
|
1620
|
+
provider: this.provider.label,
|
|
1506
1621
|
latency_ms: complexityResponse.latencyMs,
|
|
1507
1622
|
token_usage: {
|
|
1508
1623
|
input_tokens: complexityResponse.usage.inputTokens,
|
|
@@ -1518,7 +1633,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1518
1633
|
score: complexityResponse.data.answer,
|
|
1519
1634
|
reasoning: complexityResponse.data.reasoning,
|
|
1520
1635
|
metadata: {
|
|
1521
|
-
model:
|
|
1636
|
+
model: this.provider.label,
|
|
1522
1637
|
processingTimeMs: latencyMs
|
|
1523
1638
|
},
|
|
1524
1639
|
_internal: {
|
|
@@ -1532,7 +1647,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1532
1647
|
latencyMs,
|
|
1533
1648
|
textLength: text.length,
|
|
1534
1649
|
grade,
|
|
1535
|
-
provider:
|
|
1650
|
+
provider: this.provider.label,
|
|
1536
1651
|
tokenUsage: totalTokenUsage,
|
|
1537
1652
|
metadata: {
|
|
1538
1653
|
stage_details: stageDetails
|
|
@@ -1567,7 +1682,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1567
1682
|
latencyMs,
|
|
1568
1683
|
textLength: text.length,
|
|
1569
1684
|
grade,
|
|
1570
|
-
provider:
|
|
1685
|
+
provider: this.provider.label,
|
|
1571
1686
|
tokenUsage: totalTokenUsage,
|
|
1572
1687
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1573
1688
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -1595,7 +1710,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1595
1710
|
`flesch_kincaid_grade: ${metrics.fleschKincaidGrade}`
|
|
1596
1711
|
].join("\n");
|
|
1597
1712
|
const userPrompt = getUserPromptAnalysis(text, gtCountsStr);
|
|
1598
|
-
const response = await this.
|
|
1713
|
+
const response = await this.provider.generateStructured({
|
|
1599
1714
|
messages: [
|
|
1600
1715
|
{ role: "system", content: getSystemPromptAnalysis() },
|
|
1601
1716
|
{ role: "user", content: userPrompt }
|
|
@@ -1617,7 +1732,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1617
1732
|
async classifyComplexity(features, grade, excerpt) {
|
|
1618
1733
|
const featuresJSON = featuresToJSON(features, 1, true);
|
|
1619
1734
|
const userPrompt = getUserPromptComplexity(featuresJSON, grade, excerpt);
|
|
1620
|
-
const response = await this.
|
|
1735
|
+
const response = await this.provider.generateStructured({
|
|
1621
1736
|
messages: [
|
|
1622
1737
|
{ role: "system", content: getSystemPromptComplexity() },
|
|
1623
1738
|
{ role: "user", content: userPrompt }
|
|
@@ -1673,18 +1788,16 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1673
1788
|
description: "Determines appropriate grade level for text with scaffolding recommendations",
|
|
1674
1789
|
supportedGrades: [],
|
|
1675
1790
|
// No grade parameter required - evaluates what grade the text is appropriate for
|
|
1676
|
-
|
|
1677
|
-
requiresOpenAIKey: false
|
|
1791
|
+
defaultProviders: ["google" /* Google */]
|
|
1678
1792
|
};
|
|
1679
1793
|
provider;
|
|
1680
1794
|
constructor(config) {
|
|
1681
1795
|
super(config);
|
|
1682
|
-
this.provider =
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
});
|
|
1796
|
+
this.provider = this.createConfiguredProvider(
|
|
1797
|
+
"google" /* Google */,
|
|
1798
|
+
"gemini-2.5-pro",
|
|
1799
|
+
config.googleApiKey
|
|
1800
|
+
);
|
|
1688
1801
|
}
|
|
1689
1802
|
/**
|
|
1690
1803
|
* Evaluate grade level appropriateness for a given text
|
|
@@ -1692,6 +1805,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1692
1805
|
* @param text - The text to evaluate
|
|
1693
1806
|
* @returns Evaluation result with grade recommendations and scaffolding suggestions
|
|
1694
1807
|
* @throws {ValidationError} If text is empty or too short/long
|
|
1808
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1695
1809
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1696
1810
|
*/
|
|
1697
1811
|
async evaluate(text) {
|
|
@@ -1725,7 +1839,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1725
1839
|
score: response.data.grade,
|
|
1726
1840
|
reasoning: response.data.reasoning,
|
|
1727
1841
|
metadata: {
|
|
1728
|
-
model:
|
|
1842
|
+
model: this.provider.label,
|
|
1729
1843
|
processingTimeMs: latencyMs
|
|
1730
1844
|
},
|
|
1731
1845
|
_internal: response.data
|
|
@@ -1734,7 +1848,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1734
1848
|
status: "success",
|
|
1735
1849
|
latencyMs,
|
|
1736
1850
|
textLength: text.length,
|
|
1737
|
-
provider:
|
|
1851
|
+
provider: this.provider.label,
|
|
1738
1852
|
tokenUsage,
|
|
1739
1853
|
// No metadata.stage_details for single-stage evaluator
|
|
1740
1854
|
inputText: text
|
|
@@ -1759,7 +1873,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1759
1873
|
status: "error",
|
|
1760
1874
|
latencyMs,
|
|
1761
1875
|
textLength: text.length,
|
|
1762
|
-
provider:
|
|
1876
|
+
provider: this.provider.label,
|
|
1763
1877
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1764
1878
|
inputText: text
|
|
1765
1879
|
}).catch(() => {
|
|
@@ -1870,18 +1984,16 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1870
1984
|
name: "Subject Matter Knowledge",
|
|
1871
1985
|
description: "Evaluates background knowledge demands of educational texts relative to grade level",
|
|
1872
1986
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1873
|
-
|
|
1874
|
-
requiresOpenAIKey: false
|
|
1987
|
+
defaultProviders: ["google" /* Google */]
|
|
1875
1988
|
};
|
|
1876
1989
|
provider;
|
|
1877
1990
|
constructor(config) {
|
|
1878
1991
|
super(config);
|
|
1879
|
-
this.provider =
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
});
|
|
1992
|
+
this.provider = this.createConfiguredProvider(
|
|
1993
|
+
"google" /* Google */,
|
|
1994
|
+
"gemini-3-flash-preview",
|
|
1995
|
+
config.googleApiKey
|
|
1996
|
+
);
|
|
1885
1997
|
}
|
|
1886
1998
|
/**
|
|
1887
1999
|
* Evaluate subject matter knowledge complexity for a given text and grade level
|
|
@@ -1890,6 +2002,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1890
2002
|
* @param grade - The target grade level (3-12)
|
|
1891
2003
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1892
2004
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2005
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1893
2006
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1894
2007
|
*/
|
|
1895
2008
|
async evaluate(text, grade) {
|
|
@@ -1912,7 +2025,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1912
2025
|
const response = await this.evaluateSmk(text, grade, fkScore);
|
|
1913
2026
|
stageDetails.push({
|
|
1914
2027
|
stage: "smk_evaluation",
|
|
1915
|
-
provider:
|
|
2028
|
+
provider: this.provider.label,
|
|
1916
2029
|
latency_ms: response.latencyMs,
|
|
1917
2030
|
token_usage: {
|
|
1918
2031
|
input_tokens: response.usage.inputTokens,
|
|
@@ -1928,7 +2041,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1928
2041
|
score: response.data.complexity_score,
|
|
1929
2042
|
reasoning: response.data.reasoning,
|
|
1930
2043
|
metadata: {
|
|
1931
|
-
model:
|
|
2044
|
+
model: this.provider.label,
|
|
1932
2045
|
processingTimeMs: latencyMs
|
|
1933
2046
|
},
|
|
1934
2047
|
_internal: response.data
|
|
@@ -1938,7 +2051,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1938
2051
|
latencyMs,
|
|
1939
2052
|
textLength: text.length,
|
|
1940
2053
|
grade,
|
|
1941
|
-
provider:
|
|
2054
|
+
provider: this.provider.label,
|
|
1942
2055
|
tokenUsage: totalTokenUsage,
|
|
1943
2056
|
metadata: {
|
|
1944
2057
|
stage_details: stageDetails
|
|
@@ -1973,7 +2086,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1973
2086
|
latencyMs,
|
|
1974
2087
|
textLength: text.length,
|
|
1975
2088
|
grade,
|
|
1976
|
-
provider:
|
|
2089
|
+
provider: this.provider.label,
|
|
1977
2090
|
tokenUsage: totalTokenUsage,
|
|
1978
2091
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1979
2092
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -2077,18 +2190,16 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2077
2190
|
name: "Conventionality",
|
|
2078
2191
|
description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
|
|
2079
2192
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
2080
|
-
|
|
2081
|
-
requiresOpenAIKey: false
|
|
2193
|
+
defaultProviders: ["google" /* Google */]
|
|
2082
2194
|
};
|
|
2083
2195
|
provider;
|
|
2084
2196
|
constructor(config) {
|
|
2085
2197
|
super(config);
|
|
2086
|
-
this.provider =
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
});
|
|
2198
|
+
this.provider = this.createConfiguredProvider(
|
|
2199
|
+
"google" /* Google */,
|
|
2200
|
+
"gemini-3-flash-preview",
|
|
2201
|
+
config.googleApiKey
|
|
2202
|
+
);
|
|
2092
2203
|
}
|
|
2093
2204
|
/**
|
|
2094
2205
|
* Evaluate conventionality complexity for a given text and grade level
|
|
@@ -2097,6 +2208,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2097
2208
|
* @param grade - The target grade level (3-12)
|
|
2098
2209
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
2099
2210
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2211
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2100
2212
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
2101
2213
|
*/
|
|
2102
2214
|
async evaluate(text, grade) {
|
|
@@ -2119,7 +2231,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2119
2231
|
const response = await this.evaluateConventionality(text, grade, fkScore);
|
|
2120
2232
|
stageDetails.push({
|
|
2121
2233
|
stage: "conventionality_evaluation",
|
|
2122
|
-
provider:
|
|
2234
|
+
provider: this.provider.label,
|
|
2123
2235
|
latency_ms: response.latencyMs,
|
|
2124
2236
|
token_usage: {
|
|
2125
2237
|
input_tokens: response.usage.inputTokens,
|
|
@@ -2135,7 +2247,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2135
2247
|
score: response.data.complexity_score,
|
|
2136
2248
|
reasoning: response.data.reasoning,
|
|
2137
2249
|
metadata: {
|
|
2138
|
-
model:
|
|
2250
|
+
model: this.provider.label,
|
|
2139
2251
|
processingTimeMs: latencyMs
|
|
2140
2252
|
},
|
|
2141
2253
|
_internal: response.data
|
|
@@ -2145,7 +2257,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2145
2257
|
latencyMs,
|
|
2146
2258
|
textLength: text.length,
|
|
2147
2259
|
grade,
|
|
2148
|
-
provider:
|
|
2260
|
+
provider: this.provider.label,
|
|
2149
2261
|
tokenUsage: totalTokenUsage,
|
|
2150
2262
|
metadata: {
|
|
2151
2263
|
stage_details: stageDetails
|
|
@@ -2180,7 +2292,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2180
2292
|
latencyMs,
|
|
2181
2293
|
textLength: text.length,
|
|
2182
2294
|
grade,
|
|
2183
|
-
provider:
|
|
2295
|
+
provider: this.provider.label,
|
|
2184
2296
|
tokenUsage: totalTokenUsage,
|
|
2185
2297
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
2186
2298
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -2212,6 +2324,276 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2212
2324
|
};
|
|
2213
2325
|
}
|
|
2214
2326
|
};
|
|
2327
|
+
var PurposeOutputSchema = zod.z.object({ "complexity_score": zod.z.enum(["slightly_complex", "moderately_complex", "very_complex", "exceedingly_complex", "more_context_needed"]).describe("The Purpose complexity level for the target grade."), "reasoning": zod.z.string().describe("A high-level summary of why the text is at this complexity level for the target grade."), "details": zod.z.object({ "detailed_summary": zod.z.array(zod.z.object({ "factor": zod.z.string().describe("The specific text complexity factor identified."), "description": zod.z.string().describe("How this factor manifests in the text."), "effect_on_complexity_dimension": zod.z.string().describe("How this factor affects the reader's ability to understand the text's specific complexity dimension.") }).strict()).describe("Individual complexity factors with descriptions and their effects."), "adjustment_and_scaffolding": zod.z.array(zod.z.object({ "scaffolding_need": zod.z.string().describe("The complexity factor that requires scaffolding."), "suggestion": zod.z.string().describe("A specific instructional strategy to support students with this factor.") }).strict()).describe("Scaffolding strategies to make the text accessible at the target grade."), "recommended_use_cases": zod.z.array(zod.z.object({ "opportunity": zod.z.string().describe("An instructional opportunity related to the text."), "suggestion": zod.z.string().describe("A specific way to leverage this text for that instructional purpose.") }).strict()).describe("Additional instructional opportunities for using this text.") }).strict().describe("Practical instructional details including scaffolding strategies and recommended use cases.") }).strict();
|
|
2328
|
+
|
|
2329
|
+
// ../../evals/prompts/purpose/system.txt
|
|
2330
|
+
var system_default4 = '\n Role\n You are an expert reading assessment evaluator. Your task is to determine the Text Complexity of a given passage based exclusively on the Purpose dimension of the qualitative measures rubric.\n\n Task Details\n You will be provided with an informational or literary `text`, along with its `grade_level` and `fk_score` (Flesch-Kincaid). You must analyze the text and determine how difficult it is for a reader to identify the author\'s purpose. \n\n Crucially, you must distinguish between the text\'s *topic* (what it is about) and its *purpose* (why the author wrote it). \n\n Rubric: Purpose Complexity\n Exceedingly Complex: Subtle and intricate, difficult to determine; includes many theoretical or abstract elements.\n Very Complex: Implicit or subtle but fairly easy to infer; more theoretical or abstract than concrete.\n Moderately Complex: Implied but easy to identify based upon context or source.\n Slightly Complex: Explicitly stated, clear, concrete, narrowly focused.\n More Context Needed: The text is a fragment or lacks necessary introductory context, making the true purpose impossible to determine accurately without external background knowledge.\n\n Expert Rules for Evaluating Purpose\n Based on expert consensus and historical grading corrections, you must apply the following heuristics:\n\n 1. The "Slightly Complex" Benchmark (Straightforward and Explicit)\n A text is Slightly Complex if its purpose is explicitly stated or if its informative intent is straightforward, clear, concrete, and directly answers what the text is immediately about. If the text opens by clearly identifying a concrete topic (e.g., "Pins are made of either brass or iron wire") and rigidly follows through by explaining factual, practical information or a process (like manufacturing steps or geographic facts), the purpose is considered explicit and straightforward. It does *not* require a literal statement like "The purpose of this text is to..." as long as the delivery of information is direct, clear, and unadorned by persuasive elements or complex framing.\n\n 2. Moderately Complex via Guiding Questions & Inquiry Formats\n If a text begins with a general introduction and uses guiding questions (e.g., "Have you ever wondered how clouds are formed?") to transition into an explanation, the purpose is implied rather than explicitly stated upfront. Because the reader must recognize the question as the pivot point for the author\'s intent, it is Moderately Complex.\n\n 3. Moderately Complex via Multiple Distinct Informational Goals\n If a text covers a broad topic but jumps between several distinct scientific or informational objectives without an overarching framing device or explicit thesis (e.g., talking about measuring ice sheets, then mapping, then finding meteorites), the reader must synthesize these diverse facts to recognize the broader purpose, making it Moderately Complex.\n\n 4. Moderately Complex via Arguments Disguised as Information\n If an author is arguing a specific point, correcting a misconception, or defending a stance, but the text could initially be mistaken by students as purely informative factual text, it is Moderately Complex. The reader must infer the persuasive intent or argumentative purpose beneath the informative tone.\n\n 5. "More Context Needed" for Fragments\n If a text is a fragment missing a crucial introduction or context, and identifying the author\'s purpose beyond a simple surface-level description would be exceptionally difficult for a reader in the target grade level without that external background, score it as `more_context_needed`. \n\n Output Format\n Provide your evaluation in the following structure:\n reasoning:\n - Surface Analysis: Identify if the text clearly identifies its topic and delivers straightforward facts, or if it utilizes structural cues, titles, or direct thesis statements.\n - Subtlety & Framing: Is the informative purpose straightforward and concrete? Does it use guiding questions? Is it an argument disguised as pure information? Are there multiple distinct informational goals requiring synthesis?\n - Context Check: Is this text a fragment missing crucial context that obscures the deeper purpose for the target grade level?\n - Rubric Alignment: Explain how the text aligns with the specific language of the rubric, explicitly referencing the expert rules above. Justify why it isn\'t one level simpler or more complex.\n\n answer:\n - complexity_score: (slightly_complex, moderately_complex, very_complex, exceedingly_complex, more_context_needed)\n - reasoning: A brief summary of your final decision.\n - details: Structured breakdown of PurposeDetails including detailed_summary, adjustment_and_scaffolding, and recommended_use_cases.\n';
|
|
2331
|
+
|
|
2332
|
+
// ../../evals/prompts/purpose/user.txt
|
|
2333
|
+
var user_default4 = "Analyze:\nText: {text}\nGrade: {grade_level}\nFK Score: {fk_score}";
|
|
2334
|
+
|
|
2335
|
+
// ../../evals/prompts/purpose/config.json
|
|
2336
|
+
var config_default = {
|
|
2337
|
+
evaluator: {
|
|
2338
|
+
id: "literacy.gla.purpose",
|
|
2339
|
+
name: "Purpose Dimension Text Complexity Evaluator",
|
|
2340
|
+
description: "Evaluates the Purpose dimension of qualitative text complexity for K-12 reading assessment, producing a 5-level rubric rating with structured pedagogical detail."
|
|
2341
|
+
},
|
|
2342
|
+
preprocessing: [
|
|
2343
|
+
{
|
|
2344
|
+
id: "fk_score",
|
|
2345
|
+
kind: "flesch_kincaid_grade",
|
|
2346
|
+
description: "Compute the Flesch-Kincaid Grade Level for the input text and bind it to {fk_score} in the prompt.",
|
|
2347
|
+
input: "text",
|
|
2348
|
+
output: "fk_score",
|
|
2349
|
+
implementation: {
|
|
2350
|
+
python: {
|
|
2351
|
+
library: "textstat",
|
|
2352
|
+
function: "flesch_kincaid_grade",
|
|
2353
|
+
post_transform: {
|
|
2354
|
+
type: "round",
|
|
2355
|
+
precision: 2
|
|
2356
|
+
}
|
|
2357
|
+
},
|
|
2358
|
+
typescript: {
|
|
2359
|
+
library: "text-readability",
|
|
2360
|
+
function: "fleschKincaidGrade",
|
|
2361
|
+
post_transform: {
|
|
2362
|
+
type: "round",
|
|
2363
|
+
precision: 2
|
|
2364
|
+
}
|
|
2365
|
+
}
|
|
2366
|
+
}
|
|
2367
|
+
}
|
|
2368
|
+
],
|
|
2369
|
+
steps: [
|
|
2370
|
+
{
|
|
2371
|
+
id: "evaluate_purpose",
|
|
2372
|
+
description: "Single-call LLM step that produces the EvaluatorOutput JSON.",
|
|
2373
|
+
prompt: {
|
|
2374
|
+
type: "chat",
|
|
2375
|
+
messages: [
|
|
2376
|
+
{
|
|
2377
|
+
role: "system",
|
|
2378
|
+
source_path: "system.txt",
|
|
2379
|
+
sha256: "745b95b7d54dc845b99363c9d3360355381883c22a5f6a0f305d7349cae38a54"
|
|
2380
|
+
},
|
|
2381
|
+
{
|
|
2382
|
+
role: "user",
|
|
2383
|
+
source_path: "user.txt",
|
|
2384
|
+
sha256: "cd8e6347db1a55d104e34436f8f66e833bd6583645d4786a554aaefdd26479b2"
|
|
2385
|
+
}
|
|
2386
|
+
],
|
|
2387
|
+
placeholders: {
|
|
2388
|
+
text: {
|
|
2389
|
+
required: true,
|
|
2390
|
+
source: "input"
|
|
2391
|
+
},
|
|
2392
|
+
grade_level: {
|
|
2393
|
+
required: true,
|
|
2394
|
+
source: "input"
|
|
2395
|
+
},
|
|
2396
|
+
fk_score: {
|
|
2397
|
+
required: true,
|
|
2398
|
+
source: "preprocessing.fk_score"
|
|
2399
|
+
}
|
|
2400
|
+
}
|
|
2401
|
+
},
|
|
2402
|
+
model: {
|
|
2403
|
+
provider: "google",
|
|
2404
|
+
name: "gemini-3-flash-preview"
|
|
2405
|
+
},
|
|
2406
|
+
generation: {
|
|
2407
|
+
temperature: 0
|
|
2408
|
+
},
|
|
2409
|
+
parser: {
|
|
2410
|
+
kind: "structured_output"
|
|
2411
|
+
},
|
|
2412
|
+
output_binding: "formatted_output"
|
|
2413
|
+
}
|
|
2414
|
+
]};
|
|
2415
|
+
|
|
2416
|
+
// src/prompts/purpose/index.ts
|
|
2417
|
+
var STEP_ID = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
|
|
2418
|
+
var _step = config_default.steps.find((s) => s.id === STEP_ID);
|
|
2419
|
+
if (!_step) throw new Error(`Step "${STEP_ID}" not found in purpose config.json`);
|
|
2420
|
+
var PLACEHOLDER_KEYS = Object.keys(_step.prompt.placeholders);
|
|
2421
|
+
function applyPlaceholders(template, inputs) {
|
|
2422
|
+
return PLACEHOLDER_KEYS.reduce(
|
|
2423
|
+
(text, key) => key in inputs ? text.replaceAll(`{${key}}`, inputs[key]) : text,
|
|
2424
|
+
template
|
|
2425
|
+
);
|
|
2426
|
+
}
|
|
2427
|
+
function getSystemPrompt5(inputs) {
|
|
2428
|
+
return applyPlaceholders(system_default4, inputs);
|
|
2429
|
+
}
|
|
2430
|
+
function getUserPrompt5(inputs) {
|
|
2431
|
+
return applyPlaceholders(user_default4, inputs);
|
|
2432
|
+
}
|
|
2433
|
+
|
|
2434
|
+
// ../../evals/prompts/purpose/input_schema.json
|
|
2435
|
+
var input_schema_default = {
|
|
2436
|
+
properties: {
|
|
2437
|
+
grade_level: {
|
|
2438
|
+
minimum: 3,
|
|
2439
|
+
maximum: 12}
|
|
2440
|
+
}
|
|
2441
|
+
};
|
|
2442
|
+
|
|
2443
|
+
// src/evaluators/purpose.ts
|
|
2444
|
+
var STEP_ID2 = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
|
|
2445
|
+
var _step2 = config_default.steps.find((s) => s.id === STEP_ID2);
|
|
2446
|
+
if (!_step2) throw new Error(`Step "${STEP_ID2}" not found in purpose config.json`);
|
|
2447
|
+
var STEP = _step2;
|
|
2448
|
+
var GRADE_MIN = input_schema_default.properties.grade_level.minimum;
|
|
2449
|
+
var GRADE_MAX = input_schema_default.properties.grade_level.maximum;
|
|
2450
|
+
var SUPPORTED_GRADES = Array.from({ length: GRADE_MAX - GRADE_MIN + 1 }, (_, i) => String(GRADE_MIN + i));
|
|
2451
|
+
var COMPLEXITY_SCORE_DISPLAY = {
|
|
2452
|
+
"slightly_complex": "Slightly complex",
|
|
2453
|
+
"moderately_complex": "Moderately complex",
|
|
2454
|
+
"very_complex": "Very complex",
|
|
2455
|
+
"exceedingly_complex": "Exceedingly complex",
|
|
2456
|
+
"more_context_needed": "More context needed"
|
|
2457
|
+
};
|
|
2458
|
+
var PurposeEvaluator = class _PurposeEvaluator extends BaseEvaluator {
|
|
2459
|
+
static metadata = {
|
|
2460
|
+
id: config_default.evaluator.id,
|
|
2461
|
+
name: config_default.evaluator.name,
|
|
2462
|
+
description: config_default.evaluator.description,
|
|
2463
|
+
supportedGrades: SUPPORTED_GRADES,
|
|
2464
|
+
defaultProviders: ["google" /* Google */]
|
|
2465
|
+
};
|
|
2466
|
+
static TEMPERATURE = STEP.generation.temperature;
|
|
2467
|
+
static computeFkScore(text) {
|
|
2468
|
+
const fkStep = config_default.preprocessing.find((p) => p.id === "fk_score");
|
|
2469
|
+
if (!fkStep) throw new Error("fk_score preprocessing step not found in purpose config.json");
|
|
2470
|
+
return runPreprocessingStep(text, fkStep.implementation.typescript);
|
|
2471
|
+
}
|
|
2472
|
+
provider;
|
|
2473
|
+
constructor(config) {
|
|
2474
|
+
super(config);
|
|
2475
|
+
this.provider = this.createConfiguredProvider(
|
|
2476
|
+
"google" /* Google */,
|
|
2477
|
+
STEP.model.name,
|
|
2478
|
+
config.googleApiKey
|
|
2479
|
+
);
|
|
2480
|
+
}
|
|
2481
|
+
/**
|
|
2482
|
+
* Evaluate purpose complexity for a given text and grade level
|
|
2483
|
+
*
|
|
2484
|
+
* @param text - The text to evaluate
|
|
2485
|
+
* @param grade - The target grade level (3-12)
|
|
2486
|
+
* @returns Evaluation result with complexity score and detailed analysis
|
|
2487
|
+
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2488
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2489
|
+
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
2490
|
+
*/
|
|
2491
|
+
async evaluate(text, grade) {
|
|
2492
|
+
this.logger.info("Starting Purpose evaluation", {
|
|
2493
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2494
|
+
operation: "evaluate",
|
|
2495
|
+
grade,
|
|
2496
|
+
textLength: text.length
|
|
2497
|
+
});
|
|
2498
|
+
const startTime = Date.now();
|
|
2499
|
+
const stageDetails = [];
|
|
2500
|
+
try {
|
|
2501
|
+
this.validateText(text);
|
|
2502
|
+
const gradeNum = this.parseAndValidateGrade(grade);
|
|
2503
|
+
const fkScore = _PurposeEvaluator.computeFkScore(text);
|
|
2504
|
+
const inputs = {
|
|
2505
|
+
text,
|
|
2506
|
+
grade_level: String(gradeNum),
|
|
2507
|
+
fk_score: String(fkScore)
|
|
2508
|
+
};
|
|
2509
|
+
const response = await this.callLLM(inputs);
|
|
2510
|
+
const latencyMs = Date.now() - startTime;
|
|
2511
|
+
const tokenUsage = {
|
|
2512
|
+
input_tokens: response.usage.inputTokens,
|
|
2513
|
+
output_tokens: response.usage.outputTokens
|
|
2514
|
+
};
|
|
2515
|
+
stageDetails.push({
|
|
2516
|
+
stage: STEP.id,
|
|
2517
|
+
provider: this.provider.label,
|
|
2518
|
+
latency_ms: response.latencyMs,
|
|
2519
|
+
token_usage: tokenUsage
|
|
2520
|
+
});
|
|
2521
|
+
const result = {
|
|
2522
|
+
score: COMPLEXITY_SCORE_DISPLAY[response.data.complexity_score],
|
|
2523
|
+
reasoning: response.data.reasoning,
|
|
2524
|
+
metadata: {
|
|
2525
|
+
model: this.provider.label,
|
|
2526
|
+
processingTimeMs: latencyMs
|
|
2527
|
+
},
|
|
2528
|
+
_internal: response.data
|
|
2529
|
+
};
|
|
2530
|
+
this.sendTelemetry({
|
|
2531
|
+
status: "success",
|
|
2532
|
+
latencyMs,
|
|
2533
|
+
textLength: text.length,
|
|
2534
|
+
grade: String(gradeNum),
|
|
2535
|
+
provider: this.provider.label,
|
|
2536
|
+
tokenUsage,
|
|
2537
|
+
metadata: { stage_details: stageDetails },
|
|
2538
|
+
inputText: text
|
|
2539
|
+
}).catch(() => void 0);
|
|
2540
|
+
this.logger.info("Purpose evaluation completed successfully", {
|
|
2541
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2542
|
+
operation: "evaluate",
|
|
2543
|
+
grade: gradeNum,
|
|
2544
|
+
score: result.score,
|
|
2545
|
+
processingTimeMs: latencyMs
|
|
2546
|
+
});
|
|
2547
|
+
return result;
|
|
2548
|
+
} catch (error) {
|
|
2549
|
+
const latencyMs = Date.now() - startTime;
|
|
2550
|
+
this.logger.error("Purpose evaluation failed", {
|
|
2551
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2552
|
+
operation: "evaluate",
|
|
2553
|
+
grade,
|
|
2554
|
+
error: error instanceof Error ? error : void 0,
|
|
2555
|
+
processingTimeMs: latencyMs
|
|
2556
|
+
});
|
|
2557
|
+
const tokenUsage = stageDetails.length > 0 ? {
|
|
2558
|
+
input_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.input_tokens ?? 0), 0),
|
|
2559
|
+
output_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.output_tokens ?? 0), 0)
|
|
2560
|
+
} : void 0;
|
|
2561
|
+
this.sendTelemetry({
|
|
2562
|
+
status: "error",
|
|
2563
|
+
latencyMs,
|
|
2564
|
+
textLength: text.length,
|
|
2565
|
+
grade: String(grade),
|
|
2566
|
+
provider: this.provider.label,
|
|
2567
|
+
tokenUsage,
|
|
2568
|
+
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
2569
|
+
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
2570
|
+
inputText: text
|
|
2571
|
+
}).catch(() => void 0);
|
|
2572
|
+
if (error instanceof ValidationError) throw error;
|
|
2573
|
+
throw wrapProviderError(error, "Purpose evaluation failed");
|
|
2574
|
+
}
|
|
2575
|
+
}
|
|
2576
|
+
parseAndValidateGrade(grade) {
|
|
2577
|
+
const num = Number(grade.trim());
|
|
2578
|
+
if (!Number.isInteger(num) || num < GRADE_MIN || num > GRADE_MAX) {
|
|
2579
|
+
throw new ValidationError(
|
|
2580
|
+
`Invalid grade "${grade}". Purpose evaluator supports integer grades ${GRADE_MIN}\u2013${GRADE_MAX}.`
|
|
2581
|
+
);
|
|
2582
|
+
}
|
|
2583
|
+
return num;
|
|
2584
|
+
}
|
|
2585
|
+
async callLLM(inputs) {
|
|
2586
|
+
const response = await this.provider.generateStructured({
|
|
2587
|
+
messages: [
|
|
2588
|
+
{ role: "system", content: getSystemPrompt5(inputs) },
|
|
2589
|
+
{ role: "user", content: getUserPrompt5(inputs) }
|
|
2590
|
+
],
|
|
2591
|
+
schema: PurposeOutputSchema,
|
|
2592
|
+
temperature: _PurposeEvaluator.TEMPERATURE
|
|
2593
|
+
});
|
|
2594
|
+
return { data: response.data, usage: response.usage, latencyMs: response.latencyMs };
|
|
2595
|
+
}
|
|
2596
|
+
};
|
|
2215
2597
|
|
|
2216
2598
|
// src/batch/evaluator.ts
|
|
2217
2599
|
var EVALUATOR_MAP = /* @__PURE__ */ new Map([
|
|
@@ -2219,19 +2601,21 @@ var EVALUATOR_MAP = /* @__PURE__ */ new Map([
|
|
|
2219
2601
|
[SmkEvaluator.metadata.id, SmkEvaluator],
|
|
2220
2602
|
[VocabularyEvaluator.metadata.id, VocabularyEvaluator],
|
|
2221
2603
|
[SentenceStructureEvaluator.metadata.id, SentenceStructureEvaluator],
|
|
2222
|
-
[ConventionalityEvaluator.metadata.id, ConventionalityEvaluator]
|
|
2604
|
+
[ConventionalityEvaluator.metadata.id, ConventionalityEvaluator],
|
|
2605
|
+
[PurposeEvaluator.metadata.id, PurposeEvaluator]
|
|
2223
2606
|
]);
|
|
2224
2607
|
var EVALUATOR_GROUPS = [
|
|
2225
2608
|
{
|
|
2226
2609
|
id: "text-complexity",
|
|
2227
2610
|
name: "Text Complexity Analysis",
|
|
2228
|
-
description: "Evaluates
|
|
2611
|
+
description: "Evaluates all dimensions of the Qualitative Text Complexity rubric",
|
|
2229
2612
|
evaluatorIds: [
|
|
2230
2613
|
GradeLevelAppropriatenessEvaluator.metadata.id,
|
|
2231
2614
|
SmkEvaluator.metadata.id,
|
|
2232
2615
|
VocabularyEvaluator.metadata.id,
|
|
2233
2616
|
SentenceStructureEvaluator.metadata.id,
|
|
2234
|
-
ConventionalityEvaluator.metadata.id
|
|
2617
|
+
ConventionalityEvaluator.metadata.id,
|
|
2618
|
+
PurposeEvaluator.metadata.id
|
|
2235
2619
|
],
|
|
2236
2620
|
requiresGoogleKey: true,
|
|
2237
2621
|
requiresOpenAIKey: true,
|
|
@@ -3322,6 +3706,8 @@ var COMPLEXITY_SCORE_MAP = {
|
|
|
3322
3706
|
"moderately complex": 2,
|
|
3323
3707
|
"very complex": 3,
|
|
3324
3708
|
"exceedingly complex": 4
|
|
3709
|
+
// 'more context needed' has no numeric equivalent — rows with this score appear as N/A
|
|
3710
|
+
// in individual results and are excluded from aggregate stats, same as failed evaluations.
|
|
3325
3711
|
};
|
|
3326
3712
|
function evaluatorDisplayName(id) {
|
|
3327
3713
|
return id.split("-").map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
|
|
@@ -3376,7 +3762,8 @@ function groupResultsByRow(results) {
|
|
|
3376
3762
|
return grouped;
|
|
3377
3763
|
}
|
|
3378
3764
|
function formatEvaluatorPrefix(evaluatorId) {
|
|
3379
|
-
|
|
3765
|
+
const slug = evaluatorId.includes(".") ? evaluatorId.split(".").pop() : evaluatorId;
|
|
3766
|
+
return slug.replace(/-/g, "_");
|
|
3380
3767
|
}
|
|
3381
3768
|
function escapeCSV(field) {
|
|
3382
3769
|
if (field.includes(",") || field.includes('"') || field.includes("\n")) {
|