@learning-commons/evaluators 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +188 -45
- package/dist/{base-Ced9oKKa.d.cts → base-DKcAYXfb.d.cts} +142 -9
- package/dist/{base-Ced9oKKa.d.ts → base-DKcAYXfb.d.ts} +142 -9
- package/dist/batch/cli.js +635 -227
- package/dist/batch/cli.js.map +1 -1
- package/dist/batch/index.cjs +618 -218
- package/dist/batch/index.cjs.map +1 -1
- package/dist/batch/index.d.cts +3 -1
- package/dist/batch/index.d.ts +3 -1
- package/dist/batch/index.js +617 -218
- package/dist/batch/index.js.map +1 -1
- package/dist/index.cjs +626 -217
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +155 -86
- package/dist/index.d.ts +155 -86
- package/dist/index.js +622 -218
- package/dist/index.js.map +1 -1
- package/package.json +13 -4
- package/src/batch/README.md +14 -1
package/dist/batch/index.js
CHANGED
|
@@ -9,6 +9,7 @@ import { generateText, Output } from 'ai';
|
|
|
9
9
|
import { z } from 'zod';
|
|
10
10
|
import nlp from 'compromise';
|
|
11
11
|
import { syllable } from 'syllable';
|
|
12
|
+
import textReadability from 'text-readability';
|
|
12
13
|
import { parse } from 'csv-parse/sync';
|
|
13
14
|
|
|
14
15
|
// src/batch/evaluator.ts
|
|
@@ -172,8 +173,9 @@ var TimeoutError = class extends APIError {
|
|
|
172
173
|
function parseProviderError(error) {
|
|
173
174
|
if (error instanceof Error) {
|
|
174
175
|
const message = error.message;
|
|
176
|
+
const err = error;
|
|
175
177
|
const statusMatch = message.match(/\b(4\d{2}|5\d{2})\b/);
|
|
176
|
-
const statusCode = statusMatch ? parseInt(statusMatch[1]) : void 0;
|
|
178
|
+
const statusCode = err.statusCode ?? err.status ?? (statusMatch ? parseInt(statusMatch[1]) : void 0);
|
|
177
179
|
return {
|
|
178
180
|
message,
|
|
179
181
|
statusCode,
|
|
@@ -186,6 +188,11 @@ function parseProviderError(error) {
|
|
|
186
188
|
}
|
|
187
189
|
function wrapProviderError(error, defaultMessage = "API request failed") {
|
|
188
190
|
const { message, statusCode, code } = parseProviderError(error);
|
|
191
|
+
if (statusCode === 404 || statusCode === 400 && /\bmodel\b.*(not found|does not exist|invalid)/i.test(message)) {
|
|
192
|
+
return new ConfigurationError(
|
|
193
|
+
`Model not found or invalid: ${message}. Check the model ID passed to the provider.`
|
|
194
|
+
);
|
|
195
|
+
}
|
|
189
196
|
if (statusCode === 401 || statusCode === 403) {
|
|
190
197
|
return new AuthenticationError(
|
|
191
198
|
message.includes("API key") ? message : "Invalid API key",
|
|
@@ -260,6 +267,111 @@ function createLogger(customLogger, level = 2 /* WARN */) {
|
|
|
260
267
|
}
|
|
261
268
|
return new ConsoleLogger(level);
|
|
262
269
|
}
|
|
270
|
+
var VercelAIProvider = class {
|
|
271
|
+
constructor(config) {
|
|
272
|
+
this.config = config;
|
|
273
|
+
if (config.type === "custom") {
|
|
274
|
+
throw new Error(
|
|
275
|
+
"VercelAIProvider does not support custom type. Use config.customProvider directly."
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
if (!config.model || config.model.trim() === "") {
|
|
279
|
+
throw new Error(
|
|
280
|
+
`model is required for VercelAIProvider (type: "${config.type}"). No default is assumed.`
|
|
281
|
+
);
|
|
282
|
+
}
|
|
283
|
+
this.model = config.model;
|
|
284
|
+
this.label = `${config.type}:${config.model}`;
|
|
285
|
+
}
|
|
286
|
+
label;
|
|
287
|
+
model;
|
|
288
|
+
/**
|
|
289
|
+
* Generate structured output using Vercel AI SDK's generateText with output
|
|
290
|
+
*/
|
|
291
|
+
async generateStructured(request) {
|
|
292
|
+
const model = await this.getModel();
|
|
293
|
+
const startTime = Date.now();
|
|
294
|
+
const { output, usage } = await generateText({
|
|
295
|
+
model,
|
|
296
|
+
messages: request.messages,
|
|
297
|
+
output: Output.object({ schema: request.schema }),
|
|
298
|
+
temperature: request.temperature ?? 0,
|
|
299
|
+
maxRetries: this.config.maxRetries ?? 0,
|
|
300
|
+
...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
|
|
301
|
+
});
|
|
302
|
+
return {
|
|
303
|
+
data: output,
|
|
304
|
+
model: this.model,
|
|
305
|
+
usage: {
|
|
306
|
+
inputTokens: usage.inputTokens || 0,
|
|
307
|
+
outputTokens: usage.outputTokens || 0
|
|
308
|
+
},
|
|
309
|
+
latencyMs: Date.now() - startTime
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Generate plain text using Vercel AI SDK's generateText
|
|
314
|
+
*/
|
|
315
|
+
async generateText(messages, temperature) {
|
|
316
|
+
const model = await this.getModel();
|
|
317
|
+
const startTime = Date.now();
|
|
318
|
+
const { text, usage } = await generateText({
|
|
319
|
+
model,
|
|
320
|
+
messages,
|
|
321
|
+
temperature: temperature ?? this.config.temperature ?? 0,
|
|
322
|
+
maxRetries: this.config.maxRetries ?? 0
|
|
323
|
+
});
|
|
324
|
+
return {
|
|
325
|
+
text,
|
|
326
|
+
usage: {
|
|
327
|
+
inputTokens: usage.inputTokens || 0,
|
|
328
|
+
outputTokens: usage.outputTokens || 0
|
|
329
|
+
},
|
|
330
|
+
latencyMs: Date.now() - startTime
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Get the configured language model.
|
|
335
|
+
* Uses dynamic imports so consumers only need to install the provider packages they use.
|
|
336
|
+
*/
|
|
337
|
+
async getModel() {
|
|
338
|
+
const apiKey = this.config.apiKey;
|
|
339
|
+
switch (this.config.type) {
|
|
340
|
+
case "openai": {
|
|
341
|
+
const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
|
|
342
|
+
throw new Error(
|
|
343
|
+
"To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
|
|
344
|
+
);
|
|
345
|
+
});
|
|
346
|
+
return createOpenAI(apiKey ? { apiKey } : {})(this.model);
|
|
347
|
+
}
|
|
348
|
+
case "anthropic": {
|
|
349
|
+
const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
|
|
350
|
+
throw new Error(
|
|
351
|
+
"To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
|
|
352
|
+
);
|
|
353
|
+
});
|
|
354
|
+
return createAnthropic(apiKey ? { apiKey } : {})(this.model);
|
|
355
|
+
}
|
|
356
|
+
case "google": {
|
|
357
|
+
const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
|
|
358
|
+
throw new Error(
|
|
359
|
+
"To use the Google provider, install its adapter: npm install @ai-sdk/google"
|
|
360
|
+
);
|
|
361
|
+
});
|
|
362
|
+
return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(this.model);
|
|
363
|
+
}
|
|
364
|
+
default:
|
|
365
|
+
throw new Error(`Unsupported provider type: ${this.config.type}`);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
};
|
|
369
|
+
function createProvider(config) {
|
|
370
|
+
if (config.type === "custom" && config.customProvider) {
|
|
371
|
+
return config.customProvider;
|
|
372
|
+
}
|
|
373
|
+
return new VercelAIProvider(config);
|
|
374
|
+
}
|
|
263
375
|
|
|
264
376
|
// src/evaluators/base.ts
|
|
265
377
|
var VALIDATION_LIMITS = {
|
|
@@ -268,6 +380,12 @@ var VALIDATION_LIMITS = {
|
|
|
268
380
|
/** Maximum text length in characters (100K chars ≈ 25K tokens) */
|
|
269
381
|
MAX_TEXT_LENGTH: 1e5
|
|
270
382
|
};
|
|
383
|
+
var Provider = /* @__PURE__ */ ((Provider2) => {
|
|
384
|
+
Provider2["OpenAI"] = "openai";
|
|
385
|
+
Provider2["Google"] = "google";
|
|
386
|
+
Provider2["Anthropic"] = "anthropic";
|
|
387
|
+
return Provider2;
|
|
388
|
+
})(Provider || {});
|
|
271
389
|
var BaseEvaluator = class {
|
|
272
390
|
telemetryClient;
|
|
273
391
|
logger;
|
|
@@ -285,21 +403,35 @@ var BaseEvaluator = class {
|
|
|
285
403
|
* name: 'My Evaluator',
|
|
286
404
|
* description: 'Does something useful',
|
|
287
405
|
* supportedGrades: ['3', '4', '5'],
|
|
288
|
-
*
|
|
289
|
-
* requiresOpenAIKey: false,
|
|
406
|
+
* defaultProviders: [Provider.Google],
|
|
290
407
|
* };
|
|
291
408
|
* }
|
|
292
409
|
* ```
|
|
293
410
|
*/
|
|
294
411
|
static metadata;
|
|
412
|
+
/**
|
|
413
|
+
* @throws {ConfigurationError} If the subclass has not defined static metadata
|
|
414
|
+
* @throws {ConfigurationError} If modelOverride has an invalid provider or empty model
|
|
415
|
+
* @throws {ConfigurationError} If a required API key is missing
|
|
416
|
+
*/
|
|
295
417
|
constructor(config) {
|
|
296
418
|
this.logger = createLogger(config.logger, config.logLevel ?? 2 /* WARN */);
|
|
419
|
+
this.validateModelOverride(config);
|
|
297
420
|
this.validateApiKeys(config);
|
|
298
421
|
const telemetryConfig = this.normalizeTelemetryConfig(config.telemetry);
|
|
299
422
|
this.config = {
|
|
300
423
|
maxRetries: config.maxRetries ?? 2,
|
|
301
|
-
telemetry: telemetryConfig
|
|
424
|
+
telemetry: telemetryConfig,
|
|
425
|
+
modelOverride: config.modelOverride,
|
|
426
|
+
googleApiKey: config.googleApiKey,
|
|
427
|
+
openaiApiKey: config.openaiApiKey,
|
|
428
|
+
anthropicApiKey: config.anthropicApiKey
|
|
302
429
|
};
|
|
430
|
+
if (config.modelOverride) {
|
|
431
|
+
this.logger.warn(
|
|
432
|
+
`modelOverride is active: using ${config.modelOverride.provider}:${config.modelOverride.model} instead of the default model. Evaluation quality may differ from recommended defaults.`
|
|
433
|
+
);
|
|
434
|
+
}
|
|
303
435
|
if (this.config.telemetry.enabled) {
|
|
304
436
|
this.telemetryClient = new TelemetryClient({
|
|
305
437
|
endpoint: "https://api.learningcommons.org/evaluators-telemetry/v1/events",
|
|
@@ -324,21 +456,62 @@ var BaseEvaluator = class {
|
|
|
324
456
|
return meta;
|
|
325
457
|
}
|
|
326
458
|
/**
|
|
327
|
-
* Validate
|
|
328
|
-
*
|
|
459
|
+
* Validate modelOverride shape: provider must be a known Provider value and
|
|
460
|
+
* model must be a non-empty string.
|
|
461
|
+
* @throws {ConfigurationError} If the override is malformed
|
|
329
462
|
*/
|
|
330
|
-
|
|
331
|
-
if (
|
|
463
|
+
validateModelOverride(config) {
|
|
464
|
+
if (!config.modelOverride) return;
|
|
465
|
+
const validProviders = Object.values(Provider);
|
|
466
|
+
if (!validProviders.includes(config.modelOverride.provider)) {
|
|
332
467
|
throw new ConfigurationError(
|
|
333
|
-
`
|
|
468
|
+
`Invalid provider "${config.modelOverride.provider}" in modelOverride. Valid providers are: ${validProviders.join(", ")}.`
|
|
334
469
|
);
|
|
335
470
|
}
|
|
336
|
-
if (
|
|
471
|
+
if (!config.modelOverride.model || config.modelOverride.model.trim() === "") {
|
|
337
472
|
throw new ConfigurationError(
|
|
338
|
-
`
|
|
473
|
+
`modelOverride.model is required. Specify the model ID for provider "${config.modelOverride.provider}".`
|
|
339
474
|
);
|
|
340
475
|
}
|
|
341
476
|
}
|
|
477
|
+
/**
|
|
478
|
+
* Validate that the required API key is present.
|
|
479
|
+
* When modelOverride is set, checks the override provider's key.
|
|
480
|
+
* Otherwise checks the keys required by the evaluator's default providers.
|
|
481
|
+
* @throws {ConfigurationError} If a required key is missing
|
|
482
|
+
*/
|
|
483
|
+
validateApiKeys(config) {
|
|
484
|
+
const keyFor = {
|
|
485
|
+
["openai" /* OpenAI */]: config.openaiApiKey?.trim() || void 0,
|
|
486
|
+
["google" /* Google */]: config.googleApiKey?.trim() || void 0,
|
|
487
|
+
["anthropic" /* Anthropic */]: config.anthropicApiKey?.trim() || void 0
|
|
488
|
+
};
|
|
489
|
+
const humanName = {
|
|
490
|
+
["openai" /* OpenAI */]: "OpenAI API key",
|
|
491
|
+
["google" /* Google */]: "Google API key",
|
|
492
|
+
["anthropic" /* Anthropic */]: "Anthropic API key"
|
|
493
|
+
};
|
|
494
|
+
const configKey = {
|
|
495
|
+
["openai" /* OpenAI */]: "openaiApiKey",
|
|
496
|
+
["google" /* Google */]: "googleApiKey",
|
|
497
|
+
["anthropic" /* Anthropic */]: "anthropicApiKey"
|
|
498
|
+
};
|
|
499
|
+
if (config.modelOverride) {
|
|
500
|
+
if (!keyFor[config.modelOverride.provider]) {
|
|
501
|
+
throw new ConfigurationError(
|
|
502
|
+
`${humanName[config.modelOverride.provider]} is required when using modelOverride with provider "${config.modelOverride.provider}". Pass ${configKey[config.modelOverride.provider]} in config.`
|
|
503
|
+
);
|
|
504
|
+
}
|
|
505
|
+
return;
|
|
506
|
+
}
|
|
507
|
+
for (const provider of this.metadata.defaultProviders) {
|
|
508
|
+
if (!keyFor[provider]) {
|
|
509
|
+
throw new ConfigurationError(
|
|
510
|
+
`${humanName[provider]} is required for ${this.metadata.name} evaluator. Pass ${configKey[provider]} in config.`
|
|
511
|
+
);
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
}
|
|
342
515
|
/**
|
|
343
516
|
* Normalize telemetry config to standard format
|
|
344
517
|
*/
|
|
@@ -419,6 +592,33 @@ var BaseEvaluator = class {
|
|
|
419
592
|
);
|
|
420
593
|
}
|
|
421
594
|
}
|
|
595
|
+
/**
|
|
596
|
+
* Create an LLM provider, honouring modelOverride if set.
|
|
597
|
+
* When override is active, the key for the override provider is resolved
|
|
598
|
+
* from the matching top-level config field (e.g. anthropicApiKey for Anthropic).
|
|
599
|
+
*/
|
|
600
|
+
createConfiguredProvider(defaultType, defaultModel, defaultApiKey) {
|
|
601
|
+
const override = this.config.modelOverride;
|
|
602
|
+
if (override) {
|
|
603
|
+
const apiKeyFor = {
|
|
604
|
+
["openai" /* OpenAI */]: this.config.openaiApiKey,
|
|
605
|
+
["google" /* Google */]: this.config.googleApiKey,
|
|
606
|
+
["anthropic" /* Anthropic */]: this.config.anthropicApiKey
|
|
607
|
+
};
|
|
608
|
+
return createProvider({
|
|
609
|
+
type: override.provider,
|
|
610
|
+
model: override.model,
|
|
611
|
+
apiKey: apiKeyFor[override.provider],
|
|
612
|
+
maxRetries: this.config.maxRetries
|
|
613
|
+
});
|
|
614
|
+
}
|
|
615
|
+
return createProvider({
|
|
616
|
+
type: defaultType,
|
|
617
|
+
model: defaultModel,
|
|
618
|
+
apiKey: defaultApiKey,
|
|
619
|
+
maxRetries: this.config.maxRetries
|
|
620
|
+
});
|
|
621
|
+
}
|
|
422
622
|
/**
|
|
423
623
|
* Send telemetry event to analytics service
|
|
424
624
|
* Common helper for all evaluators
|
|
@@ -439,123 +639,12 @@ var BaseEvaluator = class {
|
|
|
439
639
|
provider: params.provider,
|
|
440
640
|
token_usage: params.tokenUsage,
|
|
441
641
|
metadata: params.metadata,
|
|
642
|
+
model_override: this.config.modelOverride ? true : void 0,
|
|
442
643
|
// Include input text only if recording is enabled
|
|
443
644
|
input_text: this.config.telemetry.recordInputs ? params.inputText : void 0
|
|
444
645
|
});
|
|
445
646
|
}
|
|
446
647
|
};
|
|
447
|
-
var DEFAULT_MODELS = {
|
|
448
|
-
openai: "gpt-4o",
|
|
449
|
-
anthropic: "claude-sonnet-4-5-20250929",
|
|
450
|
-
google: "gemini-2.5-pro"
|
|
451
|
-
};
|
|
452
|
-
var VercelAIProvider = class {
|
|
453
|
-
constructor(config) {
|
|
454
|
-
this.config = config;
|
|
455
|
-
if (config.type === "custom") {
|
|
456
|
-
throw new Error(
|
|
457
|
-
"VercelAIProvider does not support custom type. Use config.customProvider directly."
|
|
458
|
-
);
|
|
459
|
-
}
|
|
460
|
-
}
|
|
461
|
-
/**
|
|
462
|
-
* Generate structured output using Vercel AI SDK's generateText with output
|
|
463
|
-
*/
|
|
464
|
-
async generateStructured(request) {
|
|
465
|
-
const model = await this.getModel(request.model);
|
|
466
|
-
const startTime = Date.now();
|
|
467
|
-
const { output, usage } = await generateText({
|
|
468
|
-
model,
|
|
469
|
-
messages: request.messages,
|
|
470
|
-
output: Output.object({ schema: request.schema }),
|
|
471
|
-
temperature: request.temperature ?? 0,
|
|
472
|
-
maxRetries: this.config.maxRetries ?? 0,
|
|
473
|
-
...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
|
|
474
|
-
});
|
|
475
|
-
return {
|
|
476
|
-
data: output,
|
|
477
|
-
model: request.model || this.getDefaultModel(),
|
|
478
|
-
usage: {
|
|
479
|
-
inputTokens: usage.inputTokens || 0,
|
|
480
|
-
outputTokens: usage.outputTokens || 0
|
|
481
|
-
},
|
|
482
|
-
latencyMs: Date.now() - startTime
|
|
483
|
-
};
|
|
484
|
-
}
|
|
485
|
-
/**
|
|
486
|
-
* Generate plain text using Vercel AI SDK's generateText
|
|
487
|
-
*/
|
|
488
|
-
async generateText(messages, temperature) {
|
|
489
|
-
const model = await this.getModel();
|
|
490
|
-
const startTime = Date.now();
|
|
491
|
-
const { text, usage } = await generateText({
|
|
492
|
-
model,
|
|
493
|
-
messages,
|
|
494
|
-
temperature: temperature ?? this.config.temperature ?? 0,
|
|
495
|
-
maxRetries: this.config.maxRetries ?? 0
|
|
496
|
-
});
|
|
497
|
-
return {
|
|
498
|
-
text,
|
|
499
|
-
usage: {
|
|
500
|
-
inputTokens: usage.inputTokens || 0,
|
|
501
|
-
outputTokens: usage.outputTokens || 0
|
|
502
|
-
},
|
|
503
|
-
latencyMs: Date.now() - startTime
|
|
504
|
-
};
|
|
505
|
-
}
|
|
506
|
-
/**
|
|
507
|
-
* Get the configured language model.
|
|
508
|
-
* Uses dynamic imports so consumers only need to install the provider packages they use.
|
|
509
|
-
*/
|
|
510
|
-
async getModel(requestModel) {
|
|
511
|
-
const modelId = requestModel || this.config.model || this.getDefaultModel();
|
|
512
|
-
const apiKey = this.config.apiKey;
|
|
513
|
-
switch (this.config.type) {
|
|
514
|
-
case "openai": {
|
|
515
|
-
const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
|
|
516
|
-
throw new Error(
|
|
517
|
-
"To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
|
|
518
|
-
);
|
|
519
|
-
});
|
|
520
|
-
return createOpenAI(apiKey ? { apiKey } : {})(modelId);
|
|
521
|
-
}
|
|
522
|
-
case "anthropic": {
|
|
523
|
-
const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
|
|
524
|
-
throw new Error(
|
|
525
|
-
"To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
|
|
526
|
-
);
|
|
527
|
-
});
|
|
528
|
-
return createAnthropic(apiKey ? { apiKey } : {})(modelId);
|
|
529
|
-
}
|
|
530
|
-
case "google": {
|
|
531
|
-
const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
|
|
532
|
-
throw new Error(
|
|
533
|
-
"To use the Google provider, install its adapter: npm install @ai-sdk/google"
|
|
534
|
-
);
|
|
535
|
-
});
|
|
536
|
-
return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(modelId);
|
|
537
|
-
}
|
|
538
|
-
default:
|
|
539
|
-
throw new Error(`Unsupported provider type: ${this.config.type}`);
|
|
540
|
-
}
|
|
541
|
-
}
|
|
542
|
-
/**
|
|
543
|
-
* Get default model for the configured provider
|
|
544
|
-
*/
|
|
545
|
-
getDefaultModel() {
|
|
546
|
-
const providerType = this.config.type;
|
|
547
|
-
if (providerType === "custom") {
|
|
548
|
-
throw new Error("Cannot get default model for custom provider type");
|
|
549
|
-
}
|
|
550
|
-
return DEFAULT_MODELS[providerType];
|
|
551
|
-
}
|
|
552
|
-
};
|
|
553
|
-
function createProvider(config) {
|
|
554
|
-
if (config.type === "custom" && config.customProvider) {
|
|
555
|
-
return config.customProvider;
|
|
556
|
-
}
|
|
557
|
-
return new VercelAIProvider(config);
|
|
558
|
-
}
|
|
559
648
|
var TextComplexityLevel = z.enum([
|
|
560
649
|
"Slightly complex",
|
|
561
650
|
"Moderately complex",
|
|
@@ -757,6 +846,44 @@ function featuresToJSON(features, decimals = 1, castToInt = true) {
|
|
|
757
846
|
}
|
|
758
847
|
return JSON.stringify(payload, null, 2);
|
|
759
848
|
}
|
|
849
|
+
var LIBRARY_ADAPTERS = {
|
|
850
|
+
"text-readability": {
|
|
851
|
+
call(fnName, text) {
|
|
852
|
+
const fn = textReadability[fnName];
|
|
853
|
+
if (typeof fn !== "function") {
|
|
854
|
+
throw new Error(`Function "${fnName}" not found in text-readability.`);
|
|
855
|
+
}
|
|
856
|
+
return fn.call(textReadability, text);
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
};
|
|
860
|
+
var POST_TRANSFORMS = {
|
|
861
|
+
round(value, { precision = 0 }) {
|
|
862
|
+
const factor = 10 ** precision;
|
|
863
|
+
return Math.round(value * factor) / factor;
|
|
864
|
+
}
|
|
865
|
+
};
|
|
866
|
+
function runPreprocessingStep(text, impl) {
|
|
867
|
+
const adapter = LIBRARY_ADAPTERS[impl.library];
|
|
868
|
+
if (!adapter) {
|
|
869
|
+
const supported = Object.keys(LIBRARY_ADAPTERS).join(", ");
|
|
870
|
+
throw new Error(
|
|
871
|
+
`Unsupported preprocessing library "${impl.library}". Supported: ${supported}.`
|
|
872
|
+
);
|
|
873
|
+
}
|
|
874
|
+
let result = adapter.call(impl.function, text);
|
|
875
|
+
if (impl.post_transform) {
|
|
876
|
+
const transform = POST_TRANSFORMS[impl.post_transform.type];
|
|
877
|
+
if (!transform) {
|
|
878
|
+
const supported = Object.keys(POST_TRANSFORMS).join(", ");
|
|
879
|
+
throw new Error(
|
|
880
|
+
`Unsupported post_transform type "${impl.post_transform.type}". Supported: ${supported}.`
|
|
881
|
+
);
|
|
882
|
+
}
|
|
883
|
+
result = transform(result, impl.post_transform);
|
|
884
|
+
}
|
|
885
|
+
return result;
|
|
886
|
+
}
|
|
760
887
|
|
|
761
888
|
// ../../evals/prompts/vocabulary/background-knowledge.txt
|
|
762
889
|
var background_knowledge_default = `
|
|
@@ -1062,32 +1189,28 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1062
1189
|
name: "Vocabulary",
|
|
1063
1190
|
description: "Evaluates vocabulary complexity of educational texts relative to grade level",
|
|
1064
1191
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1065
|
-
|
|
1066
|
-
requiresOpenAIKey: true
|
|
1192
|
+
defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
|
|
1067
1193
|
};
|
|
1068
1194
|
grades34ComplexityProvider;
|
|
1069
1195
|
otherGradesComplexityProvider;
|
|
1070
1196
|
backgroundKnowledgeProvider;
|
|
1071
1197
|
constructor(config) {
|
|
1072
1198
|
super(config);
|
|
1073
|
-
this.grades34ComplexityProvider =
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
apiKey: config.openaiApiKey,
|
|
1089
|
-
maxRetries: this.config.maxRetries
|
|
1090
|
-
});
|
|
1199
|
+
this.grades34ComplexityProvider = this.createConfiguredProvider(
|
|
1200
|
+
"google" /* Google */,
|
|
1201
|
+
"gemini-2.5-pro",
|
|
1202
|
+
config.googleApiKey
|
|
1203
|
+
);
|
|
1204
|
+
this.otherGradesComplexityProvider = this.createConfiguredProvider(
|
|
1205
|
+
"openai" /* OpenAI */,
|
|
1206
|
+
"gpt-4.1-2025-04-14",
|
|
1207
|
+
config.openaiApiKey
|
|
1208
|
+
);
|
|
1209
|
+
this.backgroundKnowledgeProvider = this.createConfiguredProvider(
|
|
1210
|
+
"openai" /* OpenAI */,
|
|
1211
|
+
"gpt-4o-2024-11-20",
|
|
1212
|
+
config.openaiApiKey
|
|
1213
|
+
);
|
|
1091
1214
|
}
|
|
1092
1215
|
/**
|
|
1093
1216
|
* Evaluate vocabulary complexity for a given text and grade level
|
|
@@ -1096,6 +1219,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1096
1219
|
* @param grade - The target grade level (3-12)
|
|
1097
1220
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1098
1221
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1222
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1099
1223
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1100
1224
|
*/
|
|
1101
1225
|
async evaluate(text, grade) {
|
|
@@ -1107,7 +1231,9 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1107
1231
|
});
|
|
1108
1232
|
const startTime = Date.now();
|
|
1109
1233
|
const stageDetails = [];
|
|
1110
|
-
const
|
|
1234
|
+
const complexityProviderLabel = grade === "3" || grade === "4" ? this.grades34ComplexityProvider.label : this.otherGradesComplexityProvider.label;
|
|
1235
|
+
const backgroundProviderLabel = this.backgroundKnowledgeProvider.label;
|
|
1236
|
+
const modelLabel = this.config.modelOverride ? backgroundProviderLabel : `${backgroundProviderLabel}+${complexityProviderLabel}`;
|
|
1111
1237
|
try {
|
|
1112
1238
|
this.validateText(text);
|
|
1113
1239
|
this.validateGrade(grade, new Set(_VocabularyEvaluator.metadata.supportedGrades));
|
|
@@ -1118,7 +1244,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1118
1244
|
const bgResponse = await this.getBackgroundKnowledgeAssumption(text, grade);
|
|
1119
1245
|
stageDetails.push({
|
|
1120
1246
|
stage: "background_knowledge",
|
|
1121
|
-
provider:
|
|
1247
|
+
provider: backgroundProviderLabel,
|
|
1122
1248
|
latency_ms: bgResponse.latencyMs,
|
|
1123
1249
|
token_usage: {
|
|
1124
1250
|
input_tokens: bgResponse.usage.inputTokens,
|
|
@@ -1134,7 +1260,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1134
1260
|
);
|
|
1135
1261
|
stageDetails.push({
|
|
1136
1262
|
stage: "complexity_evaluation",
|
|
1137
|
-
provider:
|
|
1263
|
+
provider: complexityProviderLabel,
|
|
1138
1264
|
latency_ms: complexityResponse.latencyMs,
|
|
1139
1265
|
token_usage: {
|
|
1140
1266
|
input_tokens: complexityResponse.usage.inputTokens,
|
|
@@ -1150,8 +1276,10 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1150
1276
|
score: complexityResponse.data.complexity_score,
|
|
1151
1277
|
reasoning: complexityResponse.data.reasoning,
|
|
1152
1278
|
metadata: {
|
|
1153
|
-
model:
|
|
1154
|
-
processingTimeMs: latencyMs
|
|
1279
|
+
model: modelLabel,
|
|
1280
|
+
processingTimeMs: latencyMs,
|
|
1281
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
1282
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1155
1283
|
},
|
|
1156
1284
|
_internal: complexityResponse.data
|
|
1157
1285
|
};
|
|
@@ -1160,7 +1288,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1160
1288
|
latencyMs,
|
|
1161
1289
|
textLength: text.length,
|
|
1162
1290
|
grade,
|
|
1163
|
-
provider:
|
|
1291
|
+
provider: modelLabel,
|
|
1164
1292
|
tokenUsage: totalTokenUsage,
|
|
1165
1293
|
metadata: {
|
|
1166
1294
|
stage_details: stageDetails
|
|
@@ -1195,7 +1323,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1195
1323
|
latencyMs,
|
|
1196
1324
|
textLength: text.length,
|
|
1197
1325
|
grade,
|
|
1198
|
-
provider:
|
|
1326
|
+
provider: modelLabel,
|
|
1199
1327
|
tokenUsage: totalTokenUsage,
|
|
1200
1328
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1201
1329
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -1413,25 +1541,12 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1413
1541
|
name: "Sentence Structure",
|
|
1414
1542
|
description: "Evaluates sentence structure complexity based on grammatical features",
|
|
1415
1543
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1416
|
-
|
|
1417
|
-
requiresOpenAIKey: true
|
|
1544
|
+
defaultProviders: ["openai" /* OpenAI */]
|
|
1418
1545
|
};
|
|
1419
|
-
|
|
1420
|
-
complexityProvider;
|
|
1546
|
+
provider;
|
|
1421
1547
|
constructor(config) {
|
|
1422
1548
|
super(config);
|
|
1423
|
-
this.
|
|
1424
|
-
type: "openai",
|
|
1425
|
-
model: "gpt-4o",
|
|
1426
|
-
apiKey: config.openaiApiKey,
|
|
1427
|
-
maxRetries: this.config.maxRetries
|
|
1428
|
-
});
|
|
1429
|
-
this.complexityProvider = createProvider({
|
|
1430
|
-
type: "openai",
|
|
1431
|
-
model: "gpt-4o",
|
|
1432
|
-
apiKey: config.openaiApiKey,
|
|
1433
|
-
maxRetries: this.config.maxRetries
|
|
1434
|
-
});
|
|
1549
|
+
this.provider = this.createConfiguredProvider("openai" /* OpenAI */, "gpt-4o", config.openaiApiKey);
|
|
1435
1550
|
}
|
|
1436
1551
|
/**
|
|
1437
1552
|
* Evaluate sentence structure complexity for a given text and grade level
|
|
@@ -1440,6 +1555,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1440
1555
|
* @param grade - The target grade level (3-12)
|
|
1441
1556
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1442
1557
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1558
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1443
1559
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1444
1560
|
*/
|
|
1445
1561
|
async evaluate(text, grade) {
|
|
@@ -1461,7 +1577,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1461
1577
|
const analysisResponse = await this.analyzeSentenceStructure(text);
|
|
1462
1578
|
stageDetails.push({
|
|
1463
1579
|
stage: "sentence_analysis",
|
|
1464
|
-
provider:
|
|
1580
|
+
provider: this.provider.label,
|
|
1465
1581
|
latency_ms: analysisResponse.latencyMs,
|
|
1466
1582
|
token_usage: {
|
|
1467
1583
|
input_tokens: analysisResponse.usage.inputTokens,
|
|
@@ -1476,7 +1592,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1476
1592
|
const complexityResponse = await this.classifyComplexity(features, grade, text);
|
|
1477
1593
|
stageDetails.push({
|
|
1478
1594
|
stage: "complexity_classification",
|
|
1479
|
-
provider:
|
|
1595
|
+
provider: this.provider.label,
|
|
1480
1596
|
latency_ms: complexityResponse.latencyMs,
|
|
1481
1597
|
token_usage: {
|
|
1482
1598
|
input_tokens: complexityResponse.usage.inputTokens,
|
|
@@ -1492,8 +1608,10 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1492
1608
|
score: complexityResponse.data.answer,
|
|
1493
1609
|
reasoning: complexityResponse.data.reasoning,
|
|
1494
1610
|
metadata: {
|
|
1495
|
-
model:
|
|
1496
|
-
processingTimeMs: latencyMs
|
|
1611
|
+
model: this.provider.label,
|
|
1612
|
+
processingTimeMs: latencyMs,
|
|
1613
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
1614
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1497
1615
|
},
|
|
1498
1616
|
_internal: {
|
|
1499
1617
|
sentenceAnalysis: analysisResponse.data,
|
|
@@ -1506,7 +1624,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1506
1624
|
latencyMs,
|
|
1507
1625
|
textLength: text.length,
|
|
1508
1626
|
grade,
|
|
1509
|
-
provider:
|
|
1627
|
+
provider: this.provider.label,
|
|
1510
1628
|
tokenUsage: totalTokenUsage,
|
|
1511
1629
|
metadata: {
|
|
1512
1630
|
stage_details: stageDetails
|
|
@@ -1541,7 +1659,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1541
1659
|
latencyMs,
|
|
1542
1660
|
textLength: text.length,
|
|
1543
1661
|
grade,
|
|
1544
|
-
provider:
|
|
1662
|
+
provider: this.provider.label,
|
|
1545
1663
|
tokenUsage: totalTokenUsage,
|
|
1546
1664
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1547
1665
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -1569,7 +1687,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1569
1687
|
`flesch_kincaid_grade: ${metrics.fleschKincaidGrade}`
|
|
1570
1688
|
].join("\n");
|
|
1571
1689
|
const userPrompt = getUserPromptAnalysis(text, gtCountsStr);
|
|
1572
|
-
const response = await this.
|
|
1690
|
+
const response = await this.provider.generateStructured({
|
|
1573
1691
|
messages: [
|
|
1574
1692
|
{ role: "system", content: getSystemPromptAnalysis() },
|
|
1575
1693
|
{ role: "user", content: userPrompt }
|
|
@@ -1591,7 +1709,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1591
1709
|
async classifyComplexity(features, grade, excerpt) {
|
|
1592
1710
|
const featuresJSON = featuresToJSON(features, 1, true);
|
|
1593
1711
|
const userPrompt = getUserPromptComplexity(featuresJSON, grade, excerpt);
|
|
1594
|
-
const response = await this.
|
|
1712
|
+
const response = await this.provider.generateStructured({
|
|
1595
1713
|
messages: [
|
|
1596
1714
|
{ role: "system", content: getSystemPromptComplexity() },
|
|
1597
1715
|
{ role: "user", content: userPrompt }
|
|
@@ -1647,18 +1765,16 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1647
1765
|
description: "Determines appropriate grade level for text with scaffolding recommendations",
|
|
1648
1766
|
supportedGrades: [],
|
|
1649
1767
|
// No grade parameter required - evaluates what grade the text is appropriate for
|
|
1650
|
-
|
|
1651
|
-
requiresOpenAIKey: false
|
|
1768
|
+
defaultProviders: ["google" /* Google */]
|
|
1652
1769
|
};
|
|
1653
1770
|
provider;
|
|
1654
1771
|
constructor(config) {
|
|
1655
1772
|
super(config);
|
|
1656
|
-
this.provider =
|
|
1657
|
-
|
|
1658
|
-
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
});
|
|
1773
|
+
this.provider = this.createConfiguredProvider(
|
|
1774
|
+
"google" /* Google */,
|
|
1775
|
+
"gemini-2.5-pro",
|
|
1776
|
+
config.googleApiKey
|
|
1777
|
+
);
|
|
1662
1778
|
}
|
|
1663
1779
|
/**
|
|
1664
1780
|
* Evaluate grade level appropriateness for a given text
|
|
@@ -1666,6 +1782,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1666
1782
|
* @param text - The text to evaluate
|
|
1667
1783
|
* @returns Evaluation result with grade recommendations and scaffolding suggestions
|
|
1668
1784
|
* @throws {ValidationError} If text is empty or too short/long
|
|
1785
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1669
1786
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1670
1787
|
*/
|
|
1671
1788
|
async evaluate(text) {
|
|
@@ -1699,8 +1816,10 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1699
1816
|
score: response.data.grade,
|
|
1700
1817
|
reasoning: response.data.reasoning,
|
|
1701
1818
|
metadata: {
|
|
1702
|
-
model:
|
|
1703
|
-
processingTimeMs: latencyMs
|
|
1819
|
+
model: this.provider.label,
|
|
1820
|
+
processingTimeMs: latencyMs,
|
|
1821
|
+
inputTokens: tokenUsage.input_tokens,
|
|
1822
|
+
outputTokens: tokenUsage.output_tokens
|
|
1704
1823
|
},
|
|
1705
1824
|
_internal: response.data
|
|
1706
1825
|
};
|
|
@@ -1708,7 +1827,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1708
1827
|
status: "success",
|
|
1709
1828
|
latencyMs,
|
|
1710
1829
|
textLength: text.length,
|
|
1711
|
-
provider:
|
|
1830
|
+
provider: this.provider.label,
|
|
1712
1831
|
tokenUsage,
|
|
1713
1832
|
// No metadata.stage_details for single-stage evaluator
|
|
1714
1833
|
inputText: text
|
|
@@ -1733,7 +1852,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1733
1852
|
status: "error",
|
|
1734
1853
|
latencyMs,
|
|
1735
1854
|
textLength: text.length,
|
|
1736
|
-
provider:
|
|
1855
|
+
provider: this.provider.label,
|
|
1737
1856
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1738
1857
|
inputText: text
|
|
1739
1858
|
}).catch(() => {
|
|
@@ -1844,18 +1963,16 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1844
1963
|
name: "Subject Matter Knowledge",
|
|
1845
1964
|
description: "Evaluates background knowledge demands of educational texts relative to grade level",
|
|
1846
1965
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1847
|
-
|
|
1848
|
-
requiresOpenAIKey: false
|
|
1966
|
+
defaultProviders: ["google" /* Google */]
|
|
1849
1967
|
};
|
|
1850
1968
|
provider;
|
|
1851
1969
|
constructor(config) {
|
|
1852
1970
|
super(config);
|
|
1853
|
-
this.provider =
|
|
1854
|
-
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
});
|
|
1971
|
+
this.provider = this.createConfiguredProvider(
|
|
1972
|
+
"google" /* Google */,
|
|
1973
|
+
"gemini-3-flash-preview",
|
|
1974
|
+
config.googleApiKey
|
|
1975
|
+
);
|
|
1859
1976
|
}
|
|
1860
1977
|
/**
|
|
1861
1978
|
* Evaluate subject matter knowledge complexity for a given text and grade level
|
|
@@ -1864,6 +1981,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1864
1981
|
* @param grade - The target grade level (3-12)
|
|
1865
1982
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1866
1983
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1984
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1867
1985
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1868
1986
|
*/
|
|
1869
1987
|
async evaluate(text, grade) {
|
|
@@ -1886,7 +2004,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1886
2004
|
const response = await this.evaluateSmk(text, grade, fkScore);
|
|
1887
2005
|
stageDetails.push({
|
|
1888
2006
|
stage: "smk_evaluation",
|
|
1889
|
-
provider:
|
|
2007
|
+
provider: this.provider.label,
|
|
1890
2008
|
latency_ms: response.latencyMs,
|
|
1891
2009
|
token_usage: {
|
|
1892
2010
|
input_tokens: response.usage.inputTokens,
|
|
@@ -1902,8 +2020,10 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1902
2020
|
score: response.data.complexity_score,
|
|
1903
2021
|
reasoning: response.data.reasoning,
|
|
1904
2022
|
metadata: {
|
|
1905
|
-
model:
|
|
1906
|
-
processingTimeMs: latencyMs
|
|
2023
|
+
model: this.provider.label,
|
|
2024
|
+
processingTimeMs: latencyMs,
|
|
2025
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
2026
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1907
2027
|
},
|
|
1908
2028
|
_internal: response.data
|
|
1909
2029
|
};
|
|
@@ -1912,7 +2032,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1912
2032
|
latencyMs,
|
|
1913
2033
|
textLength: text.length,
|
|
1914
2034
|
grade,
|
|
1915
|
-
provider:
|
|
2035
|
+
provider: this.provider.label,
|
|
1916
2036
|
tokenUsage: totalTokenUsage,
|
|
1917
2037
|
metadata: {
|
|
1918
2038
|
stage_details: stageDetails
|
|
@@ -1947,7 +2067,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1947
2067
|
latencyMs,
|
|
1948
2068
|
textLength: text.length,
|
|
1949
2069
|
grade,
|
|
1950
|
-
provider:
|
|
2070
|
+
provider: this.provider.label,
|
|
1951
2071
|
tokenUsage: totalTokenUsage,
|
|
1952
2072
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1953
2073
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -2051,18 +2171,16 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2051
2171
|
name: "Conventionality",
|
|
2052
2172
|
description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
|
|
2053
2173
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
2054
|
-
|
|
2055
|
-
requiresOpenAIKey: false
|
|
2174
|
+
defaultProviders: ["google" /* Google */]
|
|
2056
2175
|
};
|
|
2057
2176
|
provider;
|
|
2058
2177
|
constructor(config) {
|
|
2059
2178
|
super(config);
|
|
2060
|
-
this.provider =
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
});
|
|
2179
|
+
this.provider = this.createConfiguredProvider(
|
|
2180
|
+
"google" /* Google */,
|
|
2181
|
+
"gemini-3-flash-preview",
|
|
2182
|
+
config.googleApiKey
|
|
2183
|
+
);
|
|
2066
2184
|
}
|
|
2067
2185
|
/**
|
|
2068
2186
|
* Evaluate conventionality complexity for a given text and grade level
|
|
@@ -2071,6 +2189,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2071
2189
|
* @param grade - The target grade level (3-12)
|
|
2072
2190
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
2073
2191
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2192
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2074
2193
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
2075
2194
|
*/
|
|
2076
2195
|
async evaluate(text, grade) {
|
|
@@ -2093,7 +2212,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2093
2212
|
const response = await this.evaluateConventionality(text, grade, fkScore);
|
|
2094
2213
|
stageDetails.push({
|
|
2095
2214
|
stage: "conventionality_evaluation",
|
|
2096
|
-
provider:
|
|
2215
|
+
provider: this.provider.label,
|
|
2097
2216
|
latency_ms: response.latencyMs,
|
|
2098
2217
|
token_usage: {
|
|
2099
2218
|
input_tokens: response.usage.inputTokens,
|
|
@@ -2109,8 +2228,10 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2109
2228
|
score: response.data.complexity_score,
|
|
2110
2229
|
reasoning: response.data.reasoning,
|
|
2111
2230
|
metadata: {
|
|
2112
|
-
model:
|
|
2113
|
-
processingTimeMs: latencyMs
|
|
2231
|
+
model: this.provider.label,
|
|
2232
|
+
processingTimeMs: latencyMs,
|
|
2233
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
2234
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
2114
2235
|
},
|
|
2115
2236
|
_internal: response.data
|
|
2116
2237
|
};
|
|
@@ -2119,7 +2240,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2119
2240
|
latencyMs,
|
|
2120
2241
|
textLength: text.length,
|
|
2121
2242
|
grade,
|
|
2122
|
-
provider:
|
|
2243
|
+
provider: this.provider.label,
|
|
2123
2244
|
tokenUsage: totalTokenUsage,
|
|
2124
2245
|
metadata: {
|
|
2125
2246
|
stage_details: stageDetails
|
|
@@ -2154,7 +2275,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2154
2275
|
latencyMs,
|
|
2155
2276
|
textLength: text.length,
|
|
2156
2277
|
grade,
|
|
2157
|
-
provider:
|
|
2278
|
+
provider: this.provider.label,
|
|
2158
2279
|
tokenUsage: totalTokenUsage,
|
|
2159
2280
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
2160
2281
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -2186,6 +2307,278 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2186
2307
|
};
|
|
2187
2308
|
}
|
|
2188
2309
|
};
|
|
2310
|
+
var PurposeOutputSchema = z.object({ "complexity_score": z.enum(["slightly_complex", "moderately_complex", "very_complex", "exceedingly_complex", "more_context_needed"]).describe("The Purpose complexity level for the target grade."), "reasoning": z.string().describe("A high-level summary of why the text is at this complexity level for the target grade."), "details": z.object({ "detailed_summary": z.array(z.object({ "factor": z.string().describe("The specific text complexity factor identified."), "description": z.string().describe("How this factor manifests in the text."), "effect_on_complexity_dimension": z.string().describe("How this factor affects the reader's ability to understand the text's specific complexity dimension.") }).strict()).describe("Individual complexity factors with descriptions and their effects."), "adjustment_and_scaffolding": z.array(z.object({ "scaffolding_need": z.string().describe("The complexity factor that requires scaffolding."), "suggestion": z.string().describe("A specific instructional strategy to support students with this factor.") }).strict()).describe("Scaffolding strategies to make the text accessible at the target grade."), "recommended_use_cases": z.array(z.object({ "opportunity": z.string().describe("An instructional opportunity related to the text."), "suggestion": z.string().describe("A specific way to leverage this text for that instructional purpose.") }).strict()).describe("Additional instructional opportunities for using this text.") }).strict().describe("Practical instructional details including scaffolding strategies and recommended use cases.") }).strict();
|
|
2311
|
+
|
|
2312
|
+
// ../../evals/prompts/purpose/system.txt
|
|
2313
|
+
var system_default4 = '\n Role\n You are an expert reading assessment evaluator. Your task is to determine the Text Complexity of a given passage based exclusively on the Purpose dimension of the qualitative measures rubric.\n\n Task Details\n You will be provided with an informational or literary `text`, along with its `grade_level` and `fk_score` (Flesch-Kincaid). You must analyze the text and determine how difficult it is for a reader to identify the author\'s purpose. \n\n Crucially, you must distinguish between the text\'s *topic* (what it is about) and its *purpose* (why the author wrote it). \n\n Rubric: Purpose Complexity\n Exceedingly Complex: Subtle and intricate, difficult to determine; includes many theoretical or abstract elements.\n Very Complex: Implicit or subtle but fairly easy to infer; more theoretical or abstract than concrete.\n Moderately Complex: Implied but easy to identify based upon context or source.\n Slightly Complex: Explicitly stated, clear, concrete, narrowly focused.\n More Context Needed: The text is a fragment or lacks necessary introductory context, making the true purpose impossible to determine accurately without external background knowledge.\n\n Expert Rules for Evaluating Purpose\n Based on expert consensus and historical grading corrections, you must apply the following heuristics:\n\n 1. The "Slightly Complex" Benchmark (Straightforward and Explicit)\n A text is Slightly Complex if its purpose is explicitly stated or if its informative intent is straightforward, clear, concrete, and directly answers what the text is immediately about. If the text opens by clearly identifying a concrete topic (e.g., "Pins are made of either brass or iron wire") and rigidly follows through by explaining factual, practical information or a process (like manufacturing steps or geographic facts), the purpose is considered explicit and straightforward. It does *not* require a literal statement like "The purpose of this text is to..." as long as the delivery of information is direct, clear, and unadorned by persuasive elements or complex framing.\n\n 2. Moderately Complex via Guiding Questions & Inquiry Formats\n If a text begins with a general introduction and uses guiding questions (e.g., "Have you ever wondered how clouds are formed?") to transition into an explanation, the purpose is implied rather than explicitly stated upfront. Because the reader must recognize the question as the pivot point for the author\'s intent, it is Moderately Complex.\n\n 3. Moderately Complex via Multiple Distinct Informational Goals\n If a text covers a broad topic but jumps between several distinct scientific or informational objectives without an overarching framing device or explicit thesis (e.g., talking about measuring ice sheets, then mapping, then finding meteorites), the reader must synthesize these diverse facts to recognize the broader purpose, making it Moderately Complex.\n\n 4. Moderately Complex via Arguments Disguised as Information\n If an author is arguing a specific point, correcting a misconception, or defending a stance, but the text could initially be mistaken by students as purely informative factual text, it is Moderately Complex. The reader must infer the persuasive intent or argumentative purpose beneath the informative tone.\n\n 5. "More Context Needed" for Fragments\n If a text is a fragment missing a crucial introduction or context, and identifying the author\'s purpose beyond a simple surface-level description would be exceptionally difficult for a reader in the target grade level without that external background, score it as `more_context_needed`. \n\n Output Format\n Provide your evaluation in the following structure:\n reasoning:\n - Surface Analysis: Identify if the text clearly identifies its topic and delivers straightforward facts, or if it utilizes structural cues, titles, or direct thesis statements.\n - Subtlety & Framing: Is the informative purpose straightforward and concrete? Does it use guiding questions? Is it an argument disguised as pure information? Are there multiple distinct informational goals requiring synthesis?\n - Context Check: Is this text a fragment missing crucial context that obscures the deeper purpose for the target grade level?\n - Rubric Alignment: Explain how the text aligns with the specific language of the rubric, explicitly referencing the expert rules above. Justify why it isn\'t one level simpler or more complex.\n\n answer:\n - complexity_score: (slightly_complex, moderately_complex, very_complex, exceedingly_complex, more_context_needed)\n - reasoning: A brief summary of your final decision.\n - details: Structured breakdown of PurposeDetails including detailed_summary, adjustment_and_scaffolding, and recommended_use_cases.\n';
|
|
2314
|
+
|
|
2315
|
+
// ../../evals/prompts/purpose/user.txt
|
|
2316
|
+
var user_default4 = "Analyze:\nText: {text}\nGrade: {grade_level}\nFK Score: {fk_score}";
|
|
2317
|
+
|
|
2318
|
+
// ../../evals/prompts/purpose/config.json
|
|
2319
|
+
var config_default = {
|
|
2320
|
+
evaluator: {
|
|
2321
|
+
id: "literacy.gla.purpose",
|
|
2322
|
+
name: "Purpose Dimension Text Complexity Evaluator",
|
|
2323
|
+
description: "Evaluates the Purpose dimension of qualitative text complexity for K-12 reading assessment, producing a 5-level rubric rating with structured pedagogical detail."
|
|
2324
|
+
},
|
|
2325
|
+
preprocessing: [
|
|
2326
|
+
{
|
|
2327
|
+
id: "fk_score",
|
|
2328
|
+
kind: "flesch_kincaid_grade",
|
|
2329
|
+
description: "Compute the Flesch-Kincaid Grade Level for the input text and bind it to {fk_score} in the prompt.",
|
|
2330
|
+
input: "text",
|
|
2331
|
+
output: "fk_score",
|
|
2332
|
+
implementation: {
|
|
2333
|
+
python: {
|
|
2334
|
+
library: "textstat",
|
|
2335
|
+
function: "flesch_kincaid_grade",
|
|
2336
|
+
post_transform: {
|
|
2337
|
+
type: "round",
|
|
2338
|
+
precision: 2
|
|
2339
|
+
}
|
|
2340
|
+
},
|
|
2341
|
+
typescript: {
|
|
2342
|
+
library: "text-readability",
|
|
2343
|
+
function: "fleschKincaidGrade",
|
|
2344
|
+
post_transform: {
|
|
2345
|
+
type: "round",
|
|
2346
|
+
precision: 2
|
|
2347
|
+
}
|
|
2348
|
+
}
|
|
2349
|
+
}
|
|
2350
|
+
}
|
|
2351
|
+
],
|
|
2352
|
+
steps: [
|
|
2353
|
+
{
|
|
2354
|
+
id: "evaluate_purpose",
|
|
2355
|
+
description: "Single-call LLM step that produces the EvaluatorOutput JSON.",
|
|
2356
|
+
prompt: {
|
|
2357
|
+
type: "chat",
|
|
2358
|
+
messages: [
|
|
2359
|
+
{
|
|
2360
|
+
role: "system",
|
|
2361
|
+
source_path: "system.txt",
|
|
2362
|
+
sha256: "745b95b7d54dc845b99363c9d3360355381883c22a5f6a0f305d7349cae38a54"
|
|
2363
|
+
},
|
|
2364
|
+
{
|
|
2365
|
+
role: "user",
|
|
2366
|
+
source_path: "user.txt",
|
|
2367
|
+
sha256: "cd8e6347db1a55d104e34436f8f66e833bd6583645d4786a554aaefdd26479b2"
|
|
2368
|
+
}
|
|
2369
|
+
],
|
|
2370
|
+
placeholders: {
|
|
2371
|
+
text: {
|
|
2372
|
+
required: true,
|
|
2373
|
+
source: "input"
|
|
2374
|
+
},
|
|
2375
|
+
grade_level: {
|
|
2376
|
+
required: true,
|
|
2377
|
+
source: "input"
|
|
2378
|
+
},
|
|
2379
|
+
fk_score: {
|
|
2380
|
+
required: true,
|
|
2381
|
+
source: "preprocessing.fk_score"
|
|
2382
|
+
}
|
|
2383
|
+
}
|
|
2384
|
+
},
|
|
2385
|
+
model: {
|
|
2386
|
+
provider: "google",
|
|
2387
|
+
name: "gemini-3-flash-preview"
|
|
2388
|
+
},
|
|
2389
|
+
generation: {
|
|
2390
|
+
temperature: 0
|
|
2391
|
+
},
|
|
2392
|
+
parser: {
|
|
2393
|
+
kind: "structured_output"
|
|
2394
|
+
},
|
|
2395
|
+
output_binding: "formatted_output"
|
|
2396
|
+
}
|
|
2397
|
+
]};
|
|
2398
|
+
|
|
2399
|
+
// src/prompts/purpose/index.ts
|
|
2400
|
+
var STEP_ID = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
|
|
2401
|
+
var _step = config_default.steps.find((s) => s.id === STEP_ID);
|
|
2402
|
+
if (!_step) throw new Error(`Step "${STEP_ID}" not found in purpose config.json`);
|
|
2403
|
+
var PLACEHOLDER_KEYS = Object.keys(_step.prompt.placeholders);
|
|
2404
|
+
function applyPlaceholders(template, inputs) {
|
|
2405
|
+
return PLACEHOLDER_KEYS.reduce(
|
|
2406
|
+
(text, key) => key in inputs ? text.replaceAll(`{${key}}`, inputs[key]) : text,
|
|
2407
|
+
template
|
|
2408
|
+
);
|
|
2409
|
+
}
|
|
2410
|
+
function getSystemPrompt5(inputs) {
|
|
2411
|
+
return applyPlaceholders(system_default4, inputs);
|
|
2412
|
+
}
|
|
2413
|
+
function getUserPrompt5(inputs) {
|
|
2414
|
+
return applyPlaceholders(user_default4, inputs);
|
|
2415
|
+
}
|
|
2416
|
+
|
|
2417
|
+
// ../../evals/prompts/purpose/input_schema.json
|
|
2418
|
+
var input_schema_default = {
|
|
2419
|
+
properties: {
|
|
2420
|
+
grade_level: {
|
|
2421
|
+
minimum: 3,
|
|
2422
|
+
maximum: 12}
|
|
2423
|
+
}
|
|
2424
|
+
};
|
|
2425
|
+
|
|
2426
|
+
// src/evaluators/purpose.ts
|
|
2427
|
+
var STEP_ID2 = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
|
|
2428
|
+
var _step2 = config_default.steps.find((s) => s.id === STEP_ID2);
|
|
2429
|
+
if (!_step2) throw new Error(`Step "${STEP_ID2}" not found in purpose config.json`);
|
|
2430
|
+
var STEP = _step2;
|
|
2431
|
+
var GRADE_MIN = input_schema_default.properties.grade_level.minimum;
|
|
2432
|
+
var GRADE_MAX = input_schema_default.properties.grade_level.maximum;
|
|
2433
|
+
var SUPPORTED_GRADES = Array.from({ length: GRADE_MAX - GRADE_MIN + 1 }, (_, i) => String(GRADE_MIN + i));
|
|
2434
|
+
var COMPLEXITY_SCORE_DISPLAY = {
|
|
2435
|
+
"slightly_complex": "Slightly complex",
|
|
2436
|
+
"moderately_complex": "Moderately complex",
|
|
2437
|
+
"very_complex": "Very complex",
|
|
2438
|
+
"exceedingly_complex": "Exceedingly complex",
|
|
2439
|
+
"more_context_needed": "More context needed"
|
|
2440
|
+
};
|
|
2441
|
+
var PurposeEvaluator = class _PurposeEvaluator extends BaseEvaluator {
|
|
2442
|
+
static metadata = {
|
|
2443
|
+
id: config_default.evaluator.id,
|
|
2444
|
+
name: config_default.evaluator.name,
|
|
2445
|
+
description: config_default.evaluator.description,
|
|
2446
|
+
supportedGrades: SUPPORTED_GRADES,
|
|
2447
|
+
defaultProviders: ["google" /* Google */]
|
|
2448
|
+
};
|
|
2449
|
+
static TEMPERATURE = STEP.generation.temperature;
|
|
2450
|
+
static computeFkScore(text) {
|
|
2451
|
+
const fkStep = config_default.preprocessing.find((p) => p.id === "fk_score");
|
|
2452
|
+
if (!fkStep) throw new Error("fk_score preprocessing step not found in purpose config.json");
|
|
2453
|
+
return runPreprocessingStep(text, fkStep.implementation.typescript);
|
|
2454
|
+
}
|
|
2455
|
+
provider;
|
|
2456
|
+
constructor(config) {
|
|
2457
|
+
super(config);
|
|
2458
|
+
this.provider = this.createConfiguredProvider(
|
|
2459
|
+
"google" /* Google */,
|
|
2460
|
+
STEP.model.name,
|
|
2461
|
+
config.googleApiKey
|
|
2462
|
+
);
|
|
2463
|
+
}
|
|
2464
|
+
/**
|
|
2465
|
+
* Evaluate purpose complexity for a given text and grade level
|
|
2466
|
+
*
|
|
2467
|
+
* @param text - The text to evaluate
|
|
2468
|
+
* @param grade - The target grade level (3-12)
|
|
2469
|
+
* @returns Evaluation result with complexity score and detailed analysis
|
|
2470
|
+
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2471
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2472
|
+
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
2473
|
+
*/
|
|
2474
|
+
async evaluate(text, grade) {
|
|
2475
|
+
this.logger.info("Starting Purpose evaluation", {
|
|
2476
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2477
|
+
operation: "evaluate",
|
|
2478
|
+
grade,
|
|
2479
|
+
textLength: text.length
|
|
2480
|
+
});
|
|
2481
|
+
const startTime = Date.now();
|
|
2482
|
+
const stageDetails = [];
|
|
2483
|
+
try {
|
|
2484
|
+
this.validateText(text);
|
|
2485
|
+
const gradeNum = this.parseAndValidateGrade(grade);
|
|
2486
|
+
const fkScore = _PurposeEvaluator.computeFkScore(text);
|
|
2487
|
+
const inputs = {
|
|
2488
|
+
text,
|
|
2489
|
+
grade_level: String(gradeNum),
|
|
2490
|
+
fk_score: String(fkScore)
|
|
2491
|
+
};
|
|
2492
|
+
const response = await this.callLLM(inputs);
|
|
2493
|
+
const latencyMs = Date.now() - startTime;
|
|
2494
|
+
const tokenUsage = {
|
|
2495
|
+
input_tokens: response.usage.inputTokens,
|
|
2496
|
+
output_tokens: response.usage.outputTokens
|
|
2497
|
+
};
|
|
2498
|
+
stageDetails.push({
|
|
2499
|
+
stage: STEP.id,
|
|
2500
|
+
provider: this.provider.label,
|
|
2501
|
+
latency_ms: response.latencyMs,
|
|
2502
|
+
token_usage: tokenUsage
|
|
2503
|
+
});
|
|
2504
|
+
const result = {
|
|
2505
|
+
score: COMPLEXITY_SCORE_DISPLAY[response.data.complexity_score],
|
|
2506
|
+
reasoning: response.data.reasoning,
|
|
2507
|
+
metadata: {
|
|
2508
|
+
model: this.provider.label,
|
|
2509
|
+
processingTimeMs: latencyMs,
|
|
2510
|
+
inputTokens: tokenUsage.input_tokens,
|
|
2511
|
+
outputTokens: tokenUsage.output_tokens
|
|
2512
|
+
},
|
|
2513
|
+
_internal: response.data
|
|
2514
|
+
};
|
|
2515
|
+
this.sendTelemetry({
|
|
2516
|
+
status: "success",
|
|
2517
|
+
latencyMs,
|
|
2518
|
+
textLength: text.length,
|
|
2519
|
+
grade: String(gradeNum),
|
|
2520
|
+
provider: this.provider.label,
|
|
2521
|
+
tokenUsage,
|
|
2522
|
+
metadata: { stage_details: stageDetails },
|
|
2523
|
+
inputText: text
|
|
2524
|
+
}).catch(() => void 0);
|
|
2525
|
+
this.logger.info("Purpose evaluation completed successfully", {
|
|
2526
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2527
|
+
operation: "evaluate",
|
|
2528
|
+
grade: gradeNum,
|
|
2529
|
+
score: result.score,
|
|
2530
|
+
processingTimeMs: latencyMs
|
|
2531
|
+
});
|
|
2532
|
+
return result;
|
|
2533
|
+
} catch (error) {
|
|
2534
|
+
const latencyMs = Date.now() - startTime;
|
|
2535
|
+
this.logger.error("Purpose evaluation failed", {
|
|
2536
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2537
|
+
operation: "evaluate",
|
|
2538
|
+
grade,
|
|
2539
|
+
error: error instanceof Error ? error : void 0,
|
|
2540
|
+
processingTimeMs: latencyMs
|
|
2541
|
+
});
|
|
2542
|
+
const tokenUsage = stageDetails.length > 0 ? {
|
|
2543
|
+
input_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.input_tokens ?? 0), 0),
|
|
2544
|
+
output_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.output_tokens ?? 0), 0)
|
|
2545
|
+
} : void 0;
|
|
2546
|
+
this.sendTelemetry({
|
|
2547
|
+
status: "error",
|
|
2548
|
+
latencyMs,
|
|
2549
|
+
textLength: text.length,
|
|
2550
|
+
grade: String(grade),
|
|
2551
|
+
provider: this.provider.label,
|
|
2552
|
+
tokenUsage,
|
|
2553
|
+
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
2554
|
+
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
2555
|
+
inputText: text
|
|
2556
|
+
}).catch(() => void 0);
|
|
2557
|
+
if (error instanceof ValidationError) throw error;
|
|
2558
|
+
throw wrapProviderError(error, "Purpose evaluation failed");
|
|
2559
|
+
}
|
|
2560
|
+
}
|
|
2561
|
+
parseAndValidateGrade(grade) {
|
|
2562
|
+
const num = Number(grade.trim());
|
|
2563
|
+
if (!Number.isInteger(num) || num < GRADE_MIN || num > GRADE_MAX) {
|
|
2564
|
+
throw new ValidationError(
|
|
2565
|
+
`Invalid grade "${grade}". Purpose evaluator supports integer grades ${GRADE_MIN}\u2013${GRADE_MAX}.`
|
|
2566
|
+
);
|
|
2567
|
+
}
|
|
2568
|
+
return num;
|
|
2569
|
+
}
|
|
2570
|
+
async callLLM(inputs) {
|
|
2571
|
+
const response = await this.provider.generateStructured({
|
|
2572
|
+
messages: [
|
|
2573
|
+
{ role: "system", content: getSystemPrompt5(inputs) },
|
|
2574
|
+
{ role: "user", content: getUserPrompt5(inputs) }
|
|
2575
|
+
],
|
|
2576
|
+
schema: PurposeOutputSchema,
|
|
2577
|
+
temperature: _PurposeEvaluator.TEMPERATURE
|
|
2578
|
+
});
|
|
2579
|
+
return { data: response.data, usage: response.usage, latencyMs: response.latencyMs };
|
|
2580
|
+
}
|
|
2581
|
+
};
|
|
2189
2582
|
|
|
2190
2583
|
// src/batch/evaluator.ts
|
|
2191
2584
|
var EVALUATOR_MAP = /* @__PURE__ */ new Map([
|
|
@@ -2193,19 +2586,21 @@ var EVALUATOR_MAP = /* @__PURE__ */ new Map([
|
|
|
2193
2586
|
[SmkEvaluator.metadata.id, SmkEvaluator],
|
|
2194
2587
|
[VocabularyEvaluator.metadata.id, VocabularyEvaluator],
|
|
2195
2588
|
[SentenceStructureEvaluator.metadata.id, SentenceStructureEvaluator],
|
|
2196
|
-
[ConventionalityEvaluator.metadata.id, ConventionalityEvaluator]
|
|
2589
|
+
[ConventionalityEvaluator.metadata.id, ConventionalityEvaluator],
|
|
2590
|
+
[PurposeEvaluator.metadata.id, PurposeEvaluator]
|
|
2197
2591
|
]);
|
|
2198
2592
|
var EVALUATOR_GROUPS = [
|
|
2199
2593
|
{
|
|
2200
2594
|
id: "text-complexity",
|
|
2201
2595
|
name: "Text Complexity Analysis",
|
|
2202
|
-
description: "Evaluates
|
|
2596
|
+
description: "Evaluates all dimensions of the Qualitative Text Complexity rubric",
|
|
2203
2597
|
evaluatorIds: [
|
|
2204
2598
|
GradeLevelAppropriatenessEvaluator.metadata.id,
|
|
2205
2599
|
SmkEvaluator.metadata.id,
|
|
2206
2600
|
VocabularyEvaluator.metadata.id,
|
|
2207
2601
|
SentenceStructureEvaluator.metadata.id,
|
|
2208
|
-
ConventionalityEvaluator.metadata.id
|
|
2602
|
+
ConventionalityEvaluator.metadata.id,
|
|
2603
|
+
PurposeEvaluator.metadata.id
|
|
2209
2604
|
],
|
|
2210
2605
|
requiresGoogleKey: true,
|
|
2211
2606
|
requiresOpenAIKey: true,
|
|
@@ -2226,6 +2621,7 @@ var BatchEvaluator = class {
|
|
|
2226
2621
|
concurrency: 3,
|
|
2227
2622
|
maxRetries: 2,
|
|
2228
2623
|
telemetry: false,
|
|
2624
|
+
bypassRowLimit: false,
|
|
2229
2625
|
...config
|
|
2230
2626
|
};
|
|
2231
2627
|
this.limit = pLimit(this.config.concurrency);
|
|
@@ -2379,9 +2775,9 @@ var BatchEvaluator = class {
|
|
|
2379
2775
|
`Unknown evaluator group: "${groupId}". Available: ${EVALUATOR_GROUPS.map((g) => g.id).join(", ")}`
|
|
2380
2776
|
);
|
|
2381
2777
|
}
|
|
2382
|
-
if (inputs.length > group.maxInputRows) {
|
|
2778
|
+
if (!this.config.bypassRowLimit && inputs.length > group.maxInputRows) {
|
|
2383
2779
|
throw new Error(
|
|
2384
|
-
`Input exceeds limit for "${group.id}": ${inputs.length} rows (max ${group.maxInputRows}). Split into smaller batches.`
|
|
2780
|
+
`Input exceeds limit for "${group.id}": ${inputs.length} rows (max ${group.maxInputRows}). Split into smaller batches, or pass { bypassRowLimit: true } in BatchConfig to bypass (use --bypass-row-limit on the CLI).`
|
|
2385
2781
|
);
|
|
2386
2782
|
}
|
|
2387
2783
|
this.isCancelled = false;
|
|
@@ -3296,6 +3692,8 @@ var COMPLEXITY_SCORE_MAP = {
|
|
|
3296
3692
|
"moderately complex": 2,
|
|
3297
3693
|
"very complex": 3,
|
|
3298
3694
|
"exceedingly complex": 4
|
|
3695
|
+
// 'more context needed' has no numeric equivalent — rows with this score appear as N/A
|
|
3696
|
+
// in individual results and are excluded from aggregate stats, same as failed evaluations.
|
|
3299
3697
|
};
|
|
3300
3698
|
function evaluatorDisplayName(id) {
|
|
3301
3699
|
return id.split("-").map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
|
|
@@ -3350,7 +3748,8 @@ function groupResultsByRow(results) {
|
|
|
3350
3748
|
return grouped;
|
|
3351
3749
|
}
|
|
3352
3750
|
function formatEvaluatorPrefix(evaluatorId) {
|
|
3353
|
-
|
|
3751
|
+
const slug = evaluatorId.includes(".") ? evaluatorId.split(".").pop() : evaluatorId;
|
|
3752
|
+
return slug.replace(/-/g, "_");
|
|
3354
3753
|
}
|
|
3355
3754
|
function escapeCSV(field) {
|
|
3356
3755
|
if (field.includes(",") || field.includes('"') || field.includes("\n")) {
|