@learning-commons/evaluators 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +188 -45
- package/dist/{base-Ced9oKKa.d.cts → base-DKcAYXfb.d.cts} +142 -9
- package/dist/{base-Ced9oKKa.d.ts → base-DKcAYXfb.d.ts} +142 -9
- package/dist/batch/cli.js +635 -227
- package/dist/batch/cli.js.map +1 -1
- package/dist/batch/index.cjs +618 -218
- package/dist/batch/index.cjs.map +1 -1
- package/dist/batch/index.d.cts +3 -1
- package/dist/batch/index.d.ts +3 -1
- package/dist/batch/index.js +617 -218
- package/dist/batch/index.js.map +1 -1
- package/dist/index.cjs +626 -217
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +155 -86
- package/dist/index.d.ts +155 -86
- package/dist/index.js +622 -218
- package/dist/index.js.map +1 -1
- package/package.json +13 -4
- package/src/batch/README.md +14 -1
package/dist/batch/cli.js
CHANGED
|
@@ -13,6 +13,7 @@ import { generateText, Output } from 'ai';
|
|
|
13
13
|
import { z } from 'zod';
|
|
14
14
|
import nlp from 'compromise';
|
|
15
15
|
import { syllable } from 'syllable';
|
|
16
|
+
import textReadability from 'text-readability';
|
|
16
17
|
import { parse } from 'csv-parse/sync';
|
|
17
18
|
|
|
18
19
|
// src/telemetry/client.ts
|
|
@@ -174,8 +175,9 @@ var TimeoutError = class extends APIError {
|
|
|
174
175
|
function parseProviderError(error) {
|
|
175
176
|
if (error instanceof Error) {
|
|
176
177
|
const message = error.message;
|
|
178
|
+
const err = error;
|
|
177
179
|
const statusMatch = message.match(/\b(4\d{2}|5\d{2})\b/);
|
|
178
|
-
const statusCode = statusMatch ? parseInt(statusMatch[1]) : void 0;
|
|
180
|
+
const statusCode = err.statusCode ?? err.status ?? (statusMatch ? parseInt(statusMatch[1]) : void 0);
|
|
179
181
|
return {
|
|
180
182
|
message,
|
|
181
183
|
statusCode,
|
|
@@ -188,6 +190,11 @@ function parseProviderError(error) {
|
|
|
188
190
|
}
|
|
189
191
|
function wrapProviderError(error, defaultMessage = "API request failed") {
|
|
190
192
|
const { message, statusCode, code } = parseProviderError(error);
|
|
193
|
+
if (statusCode === 404 || statusCode === 400 && /\bmodel\b.*(not found|does not exist|invalid)/i.test(message)) {
|
|
194
|
+
return new ConfigurationError(
|
|
195
|
+
`Model not found or invalid: ${message}. Check the model ID passed to the provider.`
|
|
196
|
+
);
|
|
197
|
+
}
|
|
191
198
|
if (statusCode === 401 || statusCode === 403) {
|
|
192
199
|
return new AuthenticationError(
|
|
193
200
|
message.includes("API key") ? message : "Invalid API key",
|
|
@@ -262,6 +269,111 @@ function createLogger(customLogger, level = 2 /* WARN */) {
|
|
|
262
269
|
}
|
|
263
270
|
return new ConsoleLogger(level);
|
|
264
271
|
}
|
|
272
|
+
var VercelAIProvider = class {
|
|
273
|
+
constructor(config) {
|
|
274
|
+
this.config = config;
|
|
275
|
+
if (config.type === "custom") {
|
|
276
|
+
throw new Error(
|
|
277
|
+
"VercelAIProvider does not support custom type. Use config.customProvider directly."
|
|
278
|
+
);
|
|
279
|
+
}
|
|
280
|
+
if (!config.model || config.model.trim() === "") {
|
|
281
|
+
throw new Error(
|
|
282
|
+
`model is required for VercelAIProvider (type: "${config.type}"). No default is assumed.`
|
|
283
|
+
);
|
|
284
|
+
}
|
|
285
|
+
this.model = config.model;
|
|
286
|
+
this.label = `${config.type}:${config.model}`;
|
|
287
|
+
}
|
|
288
|
+
label;
|
|
289
|
+
model;
|
|
290
|
+
/**
|
|
291
|
+
* Generate structured output using Vercel AI SDK's generateText with output
|
|
292
|
+
*/
|
|
293
|
+
async generateStructured(request) {
|
|
294
|
+
const model = await this.getModel();
|
|
295
|
+
const startTime = Date.now();
|
|
296
|
+
const { output, usage } = await generateText({
|
|
297
|
+
model,
|
|
298
|
+
messages: request.messages,
|
|
299
|
+
output: Output.object({ schema: request.schema }),
|
|
300
|
+
temperature: request.temperature ?? 0,
|
|
301
|
+
maxRetries: this.config.maxRetries ?? 0,
|
|
302
|
+
...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
|
|
303
|
+
});
|
|
304
|
+
return {
|
|
305
|
+
data: output,
|
|
306
|
+
model: this.model,
|
|
307
|
+
usage: {
|
|
308
|
+
inputTokens: usage.inputTokens || 0,
|
|
309
|
+
outputTokens: usage.outputTokens || 0
|
|
310
|
+
},
|
|
311
|
+
latencyMs: Date.now() - startTime
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Generate plain text using Vercel AI SDK's generateText
|
|
316
|
+
*/
|
|
317
|
+
async generateText(messages, temperature) {
|
|
318
|
+
const model = await this.getModel();
|
|
319
|
+
const startTime = Date.now();
|
|
320
|
+
const { text, usage } = await generateText({
|
|
321
|
+
model,
|
|
322
|
+
messages,
|
|
323
|
+
temperature: temperature ?? this.config.temperature ?? 0,
|
|
324
|
+
maxRetries: this.config.maxRetries ?? 0
|
|
325
|
+
});
|
|
326
|
+
return {
|
|
327
|
+
text,
|
|
328
|
+
usage: {
|
|
329
|
+
inputTokens: usage.inputTokens || 0,
|
|
330
|
+
outputTokens: usage.outputTokens || 0
|
|
331
|
+
},
|
|
332
|
+
latencyMs: Date.now() - startTime
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Get the configured language model.
|
|
337
|
+
* Uses dynamic imports so consumers only need to install the provider packages they use.
|
|
338
|
+
*/
|
|
339
|
+
async getModel() {
|
|
340
|
+
const apiKey = this.config.apiKey;
|
|
341
|
+
switch (this.config.type) {
|
|
342
|
+
case "openai": {
|
|
343
|
+
const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
|
|
344
|
+
throw new Error(
|
|
345
|
+
"To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
|
|
346
|
+
);
|
|
347
|
+
});
|
|
348
|
+
return createOpenAI(apiKey ? { apiKey } : {})(this.model);
|
|
349
|
+
}
|
|
350
|
+
case "anthropic": {
|
|
351
|
+
const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
|
|
352
|
+
throw new Error(
|
|
353
|
+
"To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
|
|
354
|
+
);
|
|
355
|
+
});
|
|
356
|
+
return createAnthropic(apiKey ? { apiKey } : {})(this.model);
|
|
357
|
+
}
|
|
358
|
+
case "google": {
|
|
359
|
+
const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
|
|
360
|
+
throw new Error(
|
|
361
|
+
"To use the Google provider, install its adapter: npm install @ai-sdk/google"
|
|
362
|
+
);
|
|
363
|
+
});
|
|
364
|
+
return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(this.model);
|
|
365
|
+
}
|
|
366
|
+
default:
|
|
367
|
+
throw new Error(`Unsupported provider type: ${this.config.type}`);
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
};
|
|
371
|
+
function createProvider(config) {
|
|
372
|
+
if (config.type === "custom" && config.customProvider) {
|
|
373
|
+
return config.customProvider;
|
|
374
|
+
}
|
|
375
|
+
return new VercelAIProvider(config);
|
|
376
|
+
}
|
|
265
377
|
|
|
266
378
|
// src/evaluators/base.ts
|
|
267
379
|
var VALIDATION_LIMITS = {
|
|
@@ -270,6 +382,12 @@ var VALIDATION_LIMITS = {
|
|
|
270
382
|
/** Maximum text length in characters (100K chars ≈ 25K tokens) */
|
|
271
383
|
MAX_TEXT_LENGTH: 1e5
|
|
272
384
|
};
|
|
385
|
+
var Provider = /* @__PURE__ */ ((Provider2) => {
|
|
386
|
+
Provider2["OpenAI"] = "openai";
|
|
387
|
+
Provider2["Google"] = "google";
|
|
388
|
+
Provider2["Anthropic"] = "anthropic";
|
|
389
|
+
return Provider2;
|
|
390
|
+
})(Provider || {});
|
|
273
391
|
var BaseEvaluator = class {
|
|
274
392
|
telemetryClient;
|
|
275
393
|
logger;
|
|
@@ -287,21 +405,35 @@ var BaseEvaluator = class {
|
|
|
287
405
|
* name: 'My Evaluator',
|
|
288
406
|
* description: 'Does something useful',
|
|
289
407
|
* supportedGrades: ['3', '4', '5'],
|
|
290
|
-
*
|
|
291
|
-
* requiresOpenAIKey: false,
|
|
408
|
+
* defaultProviders: [Provider.Google],
|
|
292
409
|
* };
|
|
293
410
|
* }
|
|
294
411
|
* ```
|
|
295
412
|
*/
|
|
296
413
|
static metadata;
|
|
414
|
+
/**
|
|
415
|
+
* @throws {ConfigurationError} If the subclass has not defined static metadata
|
|
416
|
+
* @throws {ConfigurationError} If modelOverride has an invalid provider or empty model
|
|
417
|
+
* @throws {ConfigurationError} If a required API key is missing
|
|
418
|
+
*/
|
|
297
419
|
constructor(config) {
|
|
298
420
|
this.logger = createLogger(config.logger, config.logLevel ?? 2 /* WARN */);
|
|
421
|
+
this.validateModelOverride(config);
|
|
299
422
|
this.validateApiKeys(config);
|
|
300
423
|
const telemetryConfig = this.normalizeTelemetryConfig(config.telemetry);
|
|
301
424
|
this.config = {
|
|
302
425
|
maxRetries: config.maxRetries ?? 2,
|
|
303
|
-
telemetry: telemetryConfig
|
|
426
|
+
telemetry: telemetryConfig,
|
|
427
|
+
modelOverride: config.modelOverride,
|
|
428
|
+
googleApiKey: config.googleApiKey,
|
|
429
|
+
openaiApiKey: config.openaiApiKey,
|
|
430
|
+
anthropicApiKey: config.anthropicApiKey
|
|
304
431
|
};
|
|
432
|
+
if (config.modelOverride) {
|
|
433
|
+
this.logger.warn(
|
|
434
|
+
`modelOverride is active: using ${config.modelOverride.provider}:${config.modelOverride.model} instead of the default model. Evaluation quality may differ from recommended defaults.`
|
|
435
|
+
);
|
|
436
|
+
}
|
|
305
437
|
if (this.config.telemetry.enabled) {
|
|
306
438
|
this.telemetryClient = new TelemetryClient({
|
|
307
439
|
endpoint: "https://api.learningcommons.org/evaluators-telemetry/v1/events",
|
|
@@ -326,21 +458,62 @@ var BaseEvaluator = class {
|
|
|
326
458
|
return meta;
|
|
327
459
|
}
|
|
328
460
|
/**
|
|
329
|
-
* Validate
|
|
330
|
-
*
|
|
461
|
+
* Validate modelOverride shape: provider must be a known Provider value and
|
|
462
|
+
* model must be a non-empty string.
|
|
463
|
+
* @throws {ConfigurationError} If the override is malformed
|
|
331
464
|
*/
|
|
332
|
-
|
|
333
|
-
if (
|
|
465
|
+
validateModelOverride(config) {
|
|
466
|
+
if (!config.modelOverride) return;
|
|
467
|
+
const validProviders = Object.values(Provider);
|
|
468
|
+
if (!validProviders.includes(config.modelOverride.provider)) {
|
|
334
469
|
throw new ConfigurationError(
|
|
335
|
-
`
|
|
470
|
+
`Invalid provider "${config.modelOverride.provider}" in modelOverride. Valid providers are: ${validProviders.join(", ")}.`
|
|
336
471
|
);
|
|
337
472
|
}
|
|
338
|
-
if (
|
|
473
|
+
if (!config.modelOverride.model || config.modelOverride.model.trim() === "") {
|
|
339
474
|
throw new ConfigurationError(
|
|
340
|
-
`
|
|
475
|
+
`modelOverride.model is required. Specify the model ID for provider "${config.modelOverride.provider}".`
|
|
341
476
|
);
|
|
342
477
|
}
|
|
343
478
|
}
|
|
479
|
+
/**
|
|
480
|
+
* Validate that the required API key is present.
|
|
481
|
+
* When modelOverride is set, checks the override provider's key.
|
|
482
|
+
* Otherwise checks the keys required by the evaluator's default providers.
|
|
483
|
+
* @throws {ConfigurationError} If a required key is missing
|
|
484
|
+
*/
|
|
485
|
+
validateApiKeys(config) {
|
|
486
|
+
const keyFor = {
|
|
487
|
+
["openai" /* OpenAI */]: config.openaiApiKey?.trim() || void 0,
|
|
488
|
+
["google" /* Google */]: config.googleApiKey?.trim() || void 0,
|
|
489
|
+
["anthropic" /* Anthropic */]: config.anthropicApiKey?.trim() || void 0
|
|
490
|
+
};
|
|
491
|
+
const humanName = {
|
|
492
|
+
["openai" /* OpenAI */]: "OpenAI API key",
|
|
493
|
+
["google" /* Google */]: "Google API key",
|
|
494
|
+
["anthropic" /* Anthropic */]: "Anthropic API key"
|
|
495
|
+
};
|
|
496
|
+
const configKey = {
|
|
497
|
+
["openai" /* OpenAI */]: "openaiApiKey",
|
|
498
|
+
["google" /* Google */]: "googleApiKey",
|
|
499
|
+
["anthropic" /* Anthropic */]: "anthropicApiKey"
|
|
500
|
+
};
|
|
501
|
+
if (config.modelOverride) {
|
|
502
|
+
if (!keyFor[config.modelOverride.provider]) {
|
|
503
|
+
throw new ConfigurationError(
|
|
504
|
+
`${humanName[config.modelOverride.provider]} is required when using modelOverride with provider "${config.modelOverride.provider}". Pass ${configKey[config.modelOverride.provider]} in config.`
|
|
505
|
+
);
|
|
506
|
+
}
|
|
507
|
+
return;
|
|
508
|
+
}
|
|
509
|
+
for (const provider of this.metadata.defaultProviders) {
|
|
510
|
+
if (!keyFor[provider]) {
|
|
511
|
+
throw new ConfigurationError(
|
|
512
|
+
`${humanName[provider]} is required for ${this.metadata.name} evaluator. Pass ${configKey[provider]} in config.`
|
|
513
|
+
);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
}
|
|
344
517
|
/**
|
|
345
518
|
* Normalize telemetry config to standard format
|
|
346
519
|
*/
|
|
@@ -421,6 +594,33 @@ var BaseEvaluator = class {
|
|
|
421
594
|
);
|
|
422
595
|
}
|
|
423
596
|
}
|
|
597
|
+
/**
|
|
598
|
+
* Create an LLM provider, honouring modelOverride if set.
|
|
599
|
+
* When override is active, the key for the override provider is resolved
|
|
600
|
+
* from the matching top-level config field (e.g. anthropicApiKey for Anthropic).
|
|
601
|
+
*/
|
|
602
|
+
createConfiguredProvider(defaultType, defaultModel, defaultApiKey) {
|
|
603
|
+
const override = this.config.modelOverride;
|
|
604
|
+
if (override) {
|
|
605
|
+
const apiKeyFor = {
|
|
606
|
+
["openai" /* OpenAI */]: this.config.openaiApiKey,
|
|
607
|
+
["google" /* Google */]: this.config.googleApiKey,
|
|
608
|
+
["anthropic" /* Anthropic */]: this.config.anthropicApiKey
|
|
609
|
+
};
|
|
610
|
+
return createProvider({
|
|
611
|
+
type: override.provider,
|
|
612
|
+
model: override.model,
|
|
613
|
+
apiKey: apiKeyFor[override.provider],
|
|
614
|
+
maxRetries: this.config.maxRetries
|
|
615
|
+
});
|
|
616
|
+
}
|
|
617
|
+
return createProvider({
|
|
618
|
+
type: defaultType,
|
|
619
|
+
model: defaultModel,
|
|
620
|
+
apiKey: defaultApiKey,
|
|
621
|
+
maxRetries: this.config.maxRetries
|
|
622
|
+
});
|
|
623
|
+
}
|
|
424
624
|
/**
|
|
425
625
|
* Send telemetry event to analytics service
|
|
426
626
|
* Common helper for all evaluators
|
|
@@ -441,123 +641,12 @@ var BaseEvaluator = class {
|
|
|
441
641
|
provider: params.provider,
|
|
442
642
|
token_usage: params.tokenUsage,
|
|
443
643
|
metadata: params.metadata,
|
|
644
|
+
model_override: this.config.modelOverride ? true : void 0,
|
|
444
645
|
// Include input text only if recording is enabled
|
|
445
646
|
input_text: this.config.telemetry.recordInputs ? params.inputText : void 0
|
|
446
647
|
});
|
|
447
648
|
}
|
|
448
649
|
};
|
|
449
|
-
var DEFAULT_MODELS = {
|
|
450
|
-
openai: "gpt-4o",
|
|
451
|
-
anthropic: "claude-sonnet-4-5-20250929",
|
|
452
|
-
google: "gemini-2.5-pro"
|
|
453
|
-
};
|
|
454
|
-
var VercelAIProvider = class {
|
|
455
|
-
constructor(config) {
|
|
456
|
-
this.config = config;
|
|
457
|
-
if (config.type === "custom") {
|
|
458
|
-
throw new Error(
|
|
459
|
-
"VercelAIProvider does not support custom type. Use config.customProvider directly."
|
|
460
|
-
);
|
|
461
|
-
}
|
|
462
|
-
}
|
|
463
|
-
/**
|
|
464
|
-
* Generate structured output using Vercel AI SDK's generateText with output
|
|
465
|
-
*/
|
|
466
|
-
async generateStructured(request) {
|
|
467
|
-
const model = await this.getModel(request.model);
|
|
468
|
-
const startTime = Date.now();
|
|
469
|
-
const { output, usage } = await generateText({
|
|
470
|
-
model,
|
|
471
|
-
messages: request.messages,
|
|
472
|
-
output: Output.object({ schema: request.schema }),
|
|
473
|
-
temperature: request.temperature ?? 0,
|
|
474
|
-
maxRetries: this.config.maxRetries ?? 0,
|
|
475
|
-
...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
|
|
476
|
-
});
|
|
477
|
-
return {
|
|
478
|
-
data: output,
|
|
479
|
-
model: request.model || this.getDefaultModel(),
|
|
480
|
-
usage: {
|
|
481
|
-
inputTokens: usage.inputTokens || 0,
|
|
482
|
-
outputTokens: usage.outputTokens || 0
|
|
483
|
-
},
|
|
484
|
-
latencyMs: Date.now() - startTime
|
|
485
|
-
};
|
|
486
|
-
}
|
|
487
|
-
/**
|
|
488
|
-
* Generate plain text using Vercel AI SDK's generateText
|
|
489
|
-
*/
|
|
490
|
-
async generateText(messages, temperature) {
|
|
491
|
-
const model = await this.getModel();
|
|
492
|
-
const startTime = Date.now();
|
|
493
|
-
const { text, usage } = await generateText({
|
|
494
|
-
model,
|
|
495
|
-
messages,
|
|
496
|
-
temperature: temperature ?? this.config.temperature ?? 0,
|
|
497
|
-
maxRetries: this.config.maxRetries ?? 0
|
|
498
|
-
});
|
|
499
|
-
return {
|
|
500
|
-
text,
|
|
501
|
-
usage: {
|
|
502
|
-
inputTokens: usage.inputTokens || 0,
|
|
503
|
-
outputTokens: usage.outputTokens || 0
|
|
504
|
-
},
|
|
505
|
-
latencyMs: Date.now() - startTime
|
|
506
|
-
};
|
|
507
|
-
}
|
|
508
|
-
/**
|
|
509
|
-
* Get the configured language model.
|
|
510
|
-
* Uses dynamic imports so consumers only need to install the provider packages they use.
|
|
511
|
-
*/
|
|
512
|
-
async getModel(requestModel) {
|
|
513
|
-
const modelId = requestModel || this.config.model || this.getDefaultModel();
|
|
514
|
-
const apiKey = this.config.apiKey;
|
|
515
|
-
switch (this.config.type) {
|
|
516
|
-
case "openai": {
|
|
517
|
-
const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
|
|
518
|
-
throw new Error(
|
|
519
|
-
"To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
|
|
520
|
-
);
|
|
521
|
-
});
|
|
522
|
-
return createOpenAI(apiKey ? { apiKey } : {})(modelId);
|
|
523
|
-
}
|
|
524
|
-
case "anthropic": {
|
|
525
|
-
const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
|
|
526
|
-
throw new Error(
|
|
527
|
-
"To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
|
|
528
|
-
);
|
|
529
|
-
});
|
|
530
|
-
return createAnthropic(apiKey ? { apiKey } : {})(modelId);
|
|
531
|
-
}
|
|
532
|
-
case "google": {
|
|
533
|
-
const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
|
|
534
|
-
throw new Error(
|
|
535
|
-
"To use the Google provider, install its adapter: npm install @ai-sdk/google"
|
|
536
|
-
);
|
|
537
|
-
});
|
|
538
|
-
return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(modelId);
|
|
539
|
-
}
|
|
540
|
-
default:
|
|
541
|
-
throw new Error(`Unsupported provider type: ${this.config.type}`);
|
|
542
|
-
}
|
|
543
|
-
}
|
|
544
|
-
/**
|
|
545
|
-
* Get default model for the configured provider
|
|
546
|
-
*/
|
|
547
|
-
getDefaultModel() {
|
|
548
|
-
const providerType = this.config.type;
|
|
549
|
-
if (providerType === "custom") {
|
|
550
|
-
throw new Error("Cannot get default model for custom provider type");
|
|
551
|
-
}
|
|
552
|
-
return DEFAULT_MODELS[providerType];
|
|
553
|
-
}
|
|
554
|
-
};
|
|
555
|
-
function createProvider(config) {
|
|
556
|
-
if (config.type === "custom" && config.customProvider) {
|
|
557
|
-
return config.customProvider;
|
|
558
|
-
}
|
|
559
|
-
return new VercelAIProvider(config);
|
|
560
|
-
}
|
|
561
650
|
var TextComplexityLevel = z.enum([
|
|
562
651
|
"Slightly complex",
|
|
563
652
|
"Moderately complex",
|
|
@@ -759,6 +848,44 @@ function featuresToJSON(features, decimals = 1, castToInt = true) {
|
|
|
759
848
|
}
|
|
760
849
|
return JSON.stringify(payload, null, 2);
|
|
761
850
|
}
|
|
851
|
+
var LIBRARY_ADAPTERS = {
|
|
852
|
+
"text-readability": {
|
|
853
|
+
call(fnName, text) {
|
|
854
|
+
const fn = textReadability[fnName];
|
|
855
|
+
if (typeof fn !== "function") {
|
|
856
|
+
throw new Error(`Function "${fnName}" not found in text-readability.`);
|
|
857
|
+
}
|
|
858
|
+
return fn.call(textReadability, text);
|
|
859
|
+
}
|
|
860
|
+
}
|
|
861
|
+
};
|
|
862
|
+
var POST_TRANSFORMS = {
|
|
863
|
+
round(value, { precision = 0 }) {
|
|
864
|
+
const factor = 10 ** precision;
|
|
865
|
+
return Math.round(value * factor) / factor;
|
|
866
|
+
}
|
|
867
|
+
};
|
|
868
|
+
function runPreprocessingStep(text, impl) {
|
|
869
|
+
const adapter = LIBRARY_ADAPTERS[impl.library];
|
|
870
|
+
if (!adapter) {
|
|
871
|
+
const supported = Object.keys(LIBRARY_ADAPTERS).join(", ");
|
|
872
|
+
throw new Error(
|
|
873
|
+
`Unsupported preprocessing library "${impl.library}". Supported: ${supported}.`
|
|
874
|
+
);
|
|
875
|
+
}
|
|
876
|
+
let result = adapter.call(impl.function, text);
|
|
877
|
+
if (impl.post_transform) {
|
|
878
|
+
const transform = POST_TRANSFORMS[impl.post_transform.type];
|
|
879
|
+
if (!transform) {
|
|
880
|
+
const supported = Object.keys(POST_TRANSFORMS).join(", ");
|
|
881
|
+
throw new Error(
|
|
882
|
+
`Unsupported post_transform type "${impl.post_transform.type}". Supported: ${supported}.`
|
|
883
|
+
);
|
|
884
|
+
}
|
|
885
|
+
result = transform(result, impl.post_transform);
|
|
886
|
+
}
|
|
887
|
+
return result;
|
|
888
|
+
}
|
|
762
889
|
|
|
763
890
|
// ../../evals/prompts/vocabulary/background-knowledge.txt
|
|
764
891
|
var background_knowledge_default = `
|
|
@@ -1064,32 +1191,28 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1064
1191
|
name: "Vocabulary",
|
|
1065
1192
|
description: "Evaluates vocabulary complexity of educational texts relative to grade level",
|
|
1066
1193
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1067
|
-
|
|
1068
|
-
requiresOpenAIKey: true
|
|
1194
|
+
defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
|
|
1069
1195
|
};
|
|
1070
1196
|
grades34ComplexityProvider;
|
|
1071
1197
|
otherGradesComplexityProvider;
|
|
1072
1198
|
backgroundKnowledgeProvider;
|
|
1073
1199
|
constructor(config) {
|
|
1074
1200
|
super(config);
|
|
1075
|
-
this.grades34ComplexityProvider =
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
apiKey: config.openaiApiKey,
|
|
1091
|
-
maxRetries: this.config.maxRetries
|
|
1092
|
-
});
|
|
1201
|
+
this.grades34ComplexityProvider = this.createConfiguredProvider(
|
|
1202
|
+
"google" /* Google */,
|
|
1203
|
+
"gemini-2.5-pro",
|
|
1204
|
+
config.googleApiKey
|
|
1205
|
+
);
|
|
1206
|
+
this.otherGradesComplexityProvider = this.createConfiguredProvider(
|
|
1207
|
+
"openai" /* OpenAI */,
|
|
1208
|
+
"gpt-4.1-2025-04-14",
|
|
1209
|
+
config.openaiApiKey
|
|
1210
|
+
);
|
|
1211
|
+
this.backgroundKnowledgeProvider = this.createConfiguredProvider(
|
|
1212
|
+
"openai" /* OpenAI */,
|
|
1213
|
+
"gpt-4o-2024-11-20",
|
|
1214
|
+
config.openaiApiKey
|
|
1215
|
+
);
|
|
1093
1216
|
}
|
|
1094
1217
|
/**
|
|
1095
1218
|
* Evaluate vocabulary complexity for a given text and grade level
|
|
@@ -1098,6 +1221,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1098
1221
|
* @param grade - The target grade level (3-12)
|
|
1099
1222
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1100
1223
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1224
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1101
1225
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1102
1226
|
*/
|
|
1103
1227
|
async evaluate(text, grade) {
|
|
@@ -1109,7 +1233,9 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1109
1233
|
});
|
|
1110
1234
|
const startTime = Date.now();
|
|
1111
1235
|
const stageDetails = [];
|
|
1112
|
-
const
|
|
1236
|
+
const complexityProviderLabel = grade === "3" || grade === "4" ? this.grades34ComplexityProvider.label : this.otherGradesComplexityProvider.label;
|
|
1237
|
+
const backgroundProviderLabel = this.backgroundKnowledgeProvider.label;
|
|
1238
|
+
const modelLabel = this.config.modelOverride ? backgroundProviderLabel : `${backgroundProviderLabel}+${complexityProviderLabel}`;
|
|
1113
1239
|
try {
|
|
1114
1240
|
this.validateText(text);
|
|
1115
1241
|
this.validateGrade(grade, new Set(_VocabularyEvaluator.metadata.supportedGrades));
|
|
@@ -1120,7 +1246,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1120
1246
|
const bgResponse = await this.getBackgroundKnowledgeAssumption(text, grade);
|
|
1121
1247
|
stageDetails.push({
|
|
1122
1248
|
stage: "background_knowledge",
|
|
1123
|
-
provider:
|
|
1249
|
+
provider: backgroundProviderLabel,
|
|
1124
1250
|
latency_ms: bgResponse.latencyMs,
|
|
1125
1251
|
token_usage: {
|
|
1126
1252
|
input_tokens: bgResponse.usage.inputTokens,
|
|
@@ -1136,7 +1262,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1136
1262
|
);
|
|
1137
1263
|
stageDetails.push({
|
|
1138
1264
|
stage: "complexity_evaluation",
|
|
1139
|
-
provider:
|
|
1265
|
+
provider: complexityProviderLabel,
|
|
1140
1266
|
latency_ms: complexityResponse.latencyMs,
|
|
1141
1267
|
token_usage: {
|
|
1142
1268
|
input_tokens: complexityResponse.usage.inputTokens,
|
|
@@ -1152,8 +1278,10 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1152
1278
|
score: complexityResponse.data.complexity_score,
|
|
1153
1279
|
reasoning: complexityResponse.data.reasoning,
|
|
1154
1280
|
metadata: {
|
|
1155
|
-
model:
|
|
1156
|
-
processingTimeMs: latencyMs
|
|
1281
|
+
model: modelLabel,
|
|
1282
|
+
processingTimeMs: latencyMs,
|
|
1283
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
1284
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1157
1285
|
},
|
|
1158
1286
|
_internal: complexityResponse.data
|
|
1159
1287
|
};
|
|
@@ -1162,7 +1290,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1162
1290
|
latencyMs,
|
|
1163
1291
|
textLength: text.length,
|
|
1164
1292
|
grade,
|
|
1165
|
-
provider:
|
|
1293
|
+
provider: modelLabel,
|
|
1166
1294
|
tokenUsage: totalTokenUsage,
|
|
1167
1295
|
metadata: {
|
|
1168
1296
|
stage_details: stageDetails
|
|
@@ -1197,7 +1325,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1197
1325
|
latencyMs,
|
|
1198
1326
|
textLength: text.length,
|
|
1199
1327
|
grade,
|
|
1200
|
-
provider:
|
|
1328
|
+
provider: modelLabel,
|
|
1201
1329
|
tokenUsage: totalTokenUsage,
|
|
1202
1330
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1203
1331
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -1415,25 +1543,12 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1415
1543
|
name: "Sentence Structure",
|
|
1416
1544
|
description: "Evaluates sentence structure complexity based on grammatical features",
|
|
1417
1545
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1418
|
-
|
|
1419
|
-
requiresOpenAIKey: true
|
|
1546
|
+
defaultProviders: ["openai" /* OpenAI */]
|
|
1420
1547
|
};
|
|
1421
|
-
|
|
1422
|
-
complexityProvider;
|
|
1548
|
+
provider;
|
|
1423
1549
|
constructor(config) {
|
|
1424
1550
|
super(config);
|
|
1425
|
-
this.
|
|
1426
|
-
type: "openai",
|
|
1427
|
-
model: "gpt-4o",
|
|
1428
|
-
apiKey: config.openaiApiKey,
|
|
1429
|
-
maxRetries: this.config.maxRetries
|
|
1430
|
-
});
|
|
1431
|
-
this.complexityProvider = createProvider({
|
|
1432
|
-
type: "openai",
|
|
1433
|
-
model: "gpt-4o",
|
|
1434
|
-
apiKey: config.openaiApiKey,
|
|
1435
|
-
maxRetries: this.config.maxRetries
|
|
1436
|
-
});
|
|
1551
|
+
this.provider = this.createConfiguredProvider("openai" /* OpenAI */, "gpt-4o", config.openaiApiKey);
|
|
1437
1552
|
}
|
|
1438
1553
|
/**
|
|
1439
1554
|
* Evaluate sentence structure complexity for a given text and grade level
|
|
@@ -1442,6 +1557,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1442
1557
|
* @param grade - The target grade level (3-12)
|
|
1443
1558
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1444
1559
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1560
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1445
1561
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1446
1562
|
*/
|
|
1447
1563
|
async evaluate(text, grade) {
|
|
@@ -1463,7 +1579,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1463
1579
|
const analysisResponse = await this.analyzeSentenceStructure(text);
|
|
1464
1580
|
stageDetails.push({
|
|
1465
1581
|
stage: "sentence_analysis",
|
|
1466
|
-
provider:
|
|
1582
|
+
provider: this.provider.label,
|
|
1467
1583
|
latency_ms: analysisResponse.latencyMs,
|
|
1468
1584
|
token_usage: {
|
|
1469
1585
|
input_tokens: analysisResponse.usage.inputTokens,
|
|
@@ -1478,7 +1594,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1478
1594
|
const complexityResponse = await this.classifyComplexity(features, grade, text);
|
|
1479
1595
|
stageDetails.push({
|
|
1480
1596
|
stage: "complexity_classification",
|
|
1481
|
-
provider:
|
|
1597
|
+
provider: this.provider.label,
|
|
1482
1598
|
latency_ms: complexityResponse.latencyMs,
|
|
1483
1599
|
token_usage: {
|
|
1484
1600
|
input_tokens: complexityResponse.usage.inputTokens,
|
|
@@ -1494,8 +1610,10 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1494
1610
|
score: complexityResponse.data.answer,
|
|
1495
1611
|
reasoning: complexityResponse.data.reasoning,
|
|
1496
1612
|
metadata: {
|
|
1497
|
-
model:
|
|
1498
|
-
processingTimeMs: latencyMs
|
|
1613
|
+
model: this.provider.label,
|
|
1614
|
+
processingTimeMs: latencyMs,
|
|
1615
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
1616
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1499
1617
|
},
|
|
1500
1618
|
_internal: {
|
|
1501
1619
|
sentenceAnalysis: analysisResponse.data,
|
|
@@ -1508,7 +1626,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1508
1626
|
latencyMs,
|
|
1509
1627
|
textLength: text.length,
|
|
1510
1628
|
grade,
|
|
1511
|
-
provider:
|
|
1629
|
+
provider: this.provider.label,
|
|
1512
1630
|
tokenUsage: totalTokenUsage,
|
|
1513
1631
|
metadata: {
|
|
1514
1632
|
stage_details: stageDetails
|
|
@@ -1543,7 +1661,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1543
1661
|
latencyMs,
|
|
1544
1662
|
textLength: text.length,
|
|
1545
1663
|
grade,
|
|
1546
|
-
provider:
|
|
1664
|
+
provider: this.provider.label,
|
|
1547
1665
|
tokenUsage: totalTokenUsage,
|
|
1548
1666
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1549
1667
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -1571,7 +1689,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1571
1689
|
`flesch_kincaid_grade: ${metrics.fleschKincaidGrade}`
|
|
1572
1690
|
].join("\n");
|
|
1573
1691
|
const userPrompt = getUserPromptAnalysis(text, gtCountsStr);
|
|
1574
|
-
const response = await this.
|
|
1692
|
+
const response = await this.provider.generateStructured({
|
|
1575
1693
|
messages: [
|
|
1576
1694
|
{ role: "system", content: getSystemPromptAnalysis() },
|
|
1577
1695
|
{ role: "user", content: userPrompt }
|
|
@@ -1593,7 +1711,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1593
1711
|
async classifyComplexity(features, grade, excerpt) {
|
|
1594
1712
|
const featuresJSON = featuresToJSON(features, 1, true);
|
|
1595
1713
|
const userPrompt = getUserPromptComplexity(featuresJSON, grade, excerpt);
|
|
1596
|
-
const response = await this.
|
|
1714
|
+
const response = await this.provider.generateStructured({
|
|
1597
1715
|
messages: [
|
|
1598
1716
|
{ role: "system", content: getSystemPromptComplexity() },
|
|
1599
1717
|
{ role: "user", content: userPrompt }
|
|
@@ -1649,18 +1767,16 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1649
1767
|
description: "Determines appropriate grade level for text with scaffolding recommendations",
|
|
1650
1768
|
supportedGrades: [],
|
|
1651
1769
|
// No grade parameter required - evaluates what grade the text is appropriate for
|
|
1652
|
-
|
|
1653
|
-
requiresOpenAIKey: false
|
|
1770
|
+
defaultProviders: ["google" /* Google */]
|
|
1654
1771
|
};
|
|
1655
1772
|
provider;
|
|
1656
1773
|
constructor(config) {
|
|
1657
1774
|
super(config);
|
|
1658
|
-
this.provider =
|
|
1659
|
-
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
});
|
|
1775
|
+
this.provider = this.createConfiguredProvider(
|
|
1776
|
+
"google" /* Google */,
|
|
1777
|
+
"gemini-2.5-pro",
|
|
1778
|
+
config.googleApiKey
|
|
1779
|
+
);
|
|
1664
1780
|
}
|
|
1665
1781
|
/**
|
|
1666
1782
|
* Evaluate grade level appropriateness for a given text
|
|
@@ -1668,6 +1784,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1668
1784
|
* @param text - The text to evaluate
|
|
1669
1785
|
* @returns Evaluation result with grade recommendations and scaffolding suggestions
|
|
1670
1786
|
* @throws {ValidationError} If text is empty or too short/long
|
|
1787
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1671
1788
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1672
1789
|
*/
|
|
1673
1790
|
async evaluate(text) {
|
|
@@ -1701,8 +1818,10 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1701
1818
|
score: response.data.grade,
|
|
1702
1819
|
reasoning: response.data.reasoning,
|
|
1703
1820
|
metadata: {
|
|
1704
|
-
model:
|
|
1705
|
-
processingTimeMs: latencyMs
|
|
1821
|
+
model: this.provider.label,
|
|
1822
|
+
processingTimeMs: latencyMs,
|
|
1823
|
+
inputTokens: tokenUsage.input_tokens,
|
|
1824
|
+
outputTokens: tokenUsage.output_tokens
|
|
1706
1825
|
},
|
|
1707
1826
|
_internal: response.data
|
|
1708
1827
|
};
|
|
@@ -1710,7 +1829,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1710
1829
|
status: "success",
|
|
1711
1830
|
latencyMs,
|
|
1712
1831
|
textLength: text.length,
|
|
1713
|
-
provider:
|
|
1832
|
+
provider: this.provider.label,
|
|
1714
1833
|
tokenUsage,
|
|
1715
1834
|
// No metadata.stage_details for single-stage evaluator
|
|
1716
1835
|
inputText: text
|
|
@@ -1735,7 +1854,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1735
1854
|
status: "error",
|
|
1736
1855
|
latencyMs,
|
|
1737
1856
|
textLength: text.length,
|
|
1738
|
-
provider:
|
|
1857
|
+
provider: this.provider.label,
|
|
1739
1858
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1740
1859
|
inputText: text
|
|
1741
1860
|
}).catch(() => {
|
|
@@ -1846,18 +1965,16 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1846
1965
|
name: "Subject Matter Knowledge",
|
|
1847
1966
|
description: "Evaluates background knowledge demands of educational texts relative to grade level",
|
|
1848
1967
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1849
|
-
|
|
1850
|
-
requiresOpenAIKey: false
|
|
1968
|
+
defaultProviders: ["google" /* Google */]
|
|
1851
1969
|
};
|
|
1852
1970
|
provider;
|
|
1853
1971
|
constructor(config) {
|
|
1854
1972
|
super(config);
|
|
1855
|
-
this.provider =
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
});
|
|
1973
|
+
this.provider = this.createConfiguredProvider(
|
|
1974
|
+
"google" /* Google */,
|
|
1975
|
+
"gemini-3-flash-preview",
|
|
1976
|
+
config.googleApiKey
|
|
1977
|
+
);
|
|
1861
1978
|
}
|
|
1862
1979
|
/**
|
|
1863
1980
|
* Evaluate subject matter knowledge complexity for a given text and grade level
|
|
@@ -1866,6 +1983,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1866
1983
|
* @param grade - The target grade level (3-12)
|
|
1867
1984
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1868
1985
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1986
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1869
1987
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1870
1988
|
*/
|
|
1871
1989
|
async evaluate(text, grade) {
|
|
@@ -1888,7 +2006,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1888
2006
|
const response = await this.evaluateSmk(text, grade, fkScore);
|
|
1889
2007
|
stageDetails.push({
|
|
1890
2008
|
stage: "smk_evaluation",
|
|
1891
|
-
provider:
|
|
2009
|
+
provider: this.provider.label,
|
|
1892
2010
|
latency_ms: response.latencyMs,
|
|
1893
2011
|
token_usage: {
|
|
1894
2012
|
input_tokens: response.usage.inputTokens,
|
|
@@ -1904,8 +2022,10 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1904
2022
|
score: response.data.complexity_score,
|
|
1905
2023
|
reasoning: response.data.reasoning,
|
|
1906
2024
|
metadata: {
|
|
1907
|
-
model:
|
|
1908
|
-
processingTimeMs: latencyMs
|
|
2025
|
+
model: this.provider.label,
|
|
2026
|
+
processingTimeMs: latencyMs,
|
|
2027
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
2028
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1909
2029
|
},
|
|
1910
2030
|
_internal: response.data
|
|
1911
2031
|
};
|
|
@@ -1914,7 +2034,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1914
2034
|
latencyMs,
|
|
1915
2035
|
textLength: text.length,
|
|
1916
2036
|
grade,
|
|
1917
|
-
provider:
|
|
2037
|
+
provider: this.provider.label,
|
|
1918
2038
|
tokenUsage: totalTokenUsage,
|
|
1919
2039
|
metadata: {
|
|
1920
2040
|
stage_details: stageDetails
|
|
@@ -1949,7 +2069,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1949
2069
|
latencyMs,
|
|
1950
2070
|
textLength: text.length,
|
|
1951
2071
|
grade,
|
|
1952
|
-
provider:
|
|
2072
|
+
provider: this.provider.label,
|
|
1953
2073
|
tokenUsage: totalTokenUsage,
|
|
1954
2074
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1955
2075
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -2053,18 +2173,16 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2053
2173
|
name: "Conventionality",
|
|
2054
2174
|
description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
|
|
2055
2175
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
2056
|
-
|
|
2057
|
-
requiresOpenAIKey: false
|
|
2176
|
+
defaultProviders: ["google" /* Google */]
|
|
2058
2177
|
};
|
|
2059
2178
|
provider;
|
|
2060
2179
|
constructor(config) {
|
|
2061
2180
|
super(config);
|
|
2062
|
-
this.provider =
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
});
|
|
2181
|
+
this.provider = this.createConfiguredProvider(
|
|
2182
|
+
"google" /* Google */,
|
|
2183
|
+
"gemini-3-flash-preview",
|
|
2184
|
+
config.googleApiKey
|
|
2185
|
+
);
|
|
2068
2186
|
}
|
|
2069
2187
|
/**
|
|
2070
2188
|
* Evaluate conventionality complexity for a given text and grade level
|
|
@@ -2073,6 +2191,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2073
2191
|
* @param grade - The target grade level (3-12)
|
|
2074
2192
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
2075
2193
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2194
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2076
2195
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
2077
2196
|
*/
|
|
2078
2197
|
async evaluate(text, grade) {
|
|
@@ -2095,7 +2214,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2095
2214
|
const response = await this.evaluateConventionality(text, grade, fkScore);
|
|
2096
2215
|
stageDetails.push({
|
|
2097
2216
|
stage: "conventionality_evaluation",
|
|
2098
|
-
provider:
|
|
2217
|
+
provider: this.provider.label,
|
|
2099
2218
|
latency_ms: response.latencyMs,
|
|
2100
2219
|
token_usage: {
|
|
2101
2220
|
input_tokens: response.usage.inputTokens,
|
|
@@ -2111,8 +2230,10 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2111
2230
|
score: response.data.complexity_score,
|
|
2112
2231
|
reasoning: response.data.reasoning,
|
|
2113
2232
|
metadata: {
|
|
2114
|
-
model:
|
|
2115
|
-
processingTimeMs: latencyMs
|
|
2233
|
+
model: this.provider.label,
|
|
2234
|
+
processingTimeMs: latencyMs,
|
|
2235
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
2236
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
2116
2237
|
},
|
|
2117
2238
|
_internal: response.data
|
|
2118
2239
|
};
|
|
@@ -2121,7 +2242,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2121
2242
|
latencyMs,
|
|
2122
2243
|
textLength: text.length,
|
|
2123
2244
|
grade,
|
|
2124
|
-
provider:
|
|
2245
|
+
provider: this.provider.label,
|
|
2125
2246
|
tokenUsage: totalTokenUsage,
|
|
2126
2247
|
metadata: {
|
|
2127
2248
|
stage_details: stageDetails
|
|
@@ -2156,7 +2277,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2156
2277
|
latencyMs,
|
|
2157
2278
|
textLength: text.length,
|
|
2158
2279
|
grade,
|
|
2159
|
-
provider:
|
|
2280
|
+
provider: this.provider.label,
|
|
2160
2281
|
tokenUsage: totalTokenUsage,
|
|
2161
2282
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
2162
2283
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -2188,6 +2309,278 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2188
2309
|
};
|
|
2189
2310
|
}
|
|
2190
2311
|
};
|
|
2312
|
+
var PurposeOutputSchema = z.object({ "complexity_score": z.enum(["slightly_complex", "moderately_complex", "very_complex", "exceedingly_complex", "more_context_needed"]).describe("The Purpose complexity level for the target grade."), "reasoning": z.string().describe("A high-level summary of why the text is at this complexity level for the target grade."), "details": z.object({ "detailed_summary": z.array(z.object({ "factor": z.string().describe("The specific text complexity factor identified."), "description": z.string().describe("How this factor manifests in the text."), "effect_on_complexity_dimension": z.string().describe("How this factor affects the reader's ability to understand the text's specific complexity dimension.") }).strict()).describe("Individual complexity factors with descriptions and their effects."), "adjustment_and_scaffolding": z.array(z.object({ "scaffolding_need": z.string().describe("The complexity factor that requires scaffolding."), "suggestion": z.string().describe("A specific instructional strategy to support students with this factor.") }).strict()).describe("Scaffolding strategies to make the text accessible at the target grade."), "recommended_use_cases": z.array(z.object({ "opportunity": z.string().describe("An instructional opportunity related to the text."), "suggestion": z.string().describe("A specific way to leverage this text for that instructional purpose.") }).strict()).describe("Additional instructional opportunities for using this text.") }).strict().describe("Practical instructional details including scaffolding strategies and recommended use cases.") }).strict();
|
|
2313
|
+
|
|
2314
|
+
// ../../evals/prompts/purpose/system.txt
|
|
2315
|
+
var system_default4 = '\n Role\n You are an expert reading assessment evaluator. Your task is to determine the Text Complexity of a given passage based exclusively on the Purpose dimension of the qualitative measures rubric.\n\n Task Details\n You will be provided with an informational or literary `text`, along with its `grade_level` and `fk_score` (Flesch-Kincaid). You must analyze the text and determine how difficult it is for a reader to identify the author\'s purpose. \n\n Crucially, you must distinguish between the text\'s *topic* (what it is about) and its *purpose* (why the author wrote it). \n\n Rubric: Purpose Complexity\n Exceedingly Complex: Subtle and intricate, difficult to determine; includes many theoretical or abstract elements.\n Very Complex: Implicit or subtle but fairly easy to infer; more theoretical or abstract than concrete.\n Moderately Complex: Implied but easy to identify based upon context or source.\n Slightly Complex: Explicitly stated, clear, concrete, narrowly focused.\n More Context Needed: The text is a fragment or lacks necessary introductory context, making the true purpose impossible to determine accurately without external background knowledge.\n\n Expert Rules for Evaluating Purpose\n Based on expert consensus and historical grading corrections, you must apply the following heuristics:\n\n 1. The "Slightly Complex" Benchmark (Straightforward and Explicit)\n A text is Slightly Complex if its purpose is explicitly stated or if its informative intent is straightforward, clear, concrete, and directly answers what the text is immediately about. If the text opens by clearly identifying a concrete topic (e.g., "Pins are made of either brass or iron wire") and rigidly follows through by explaining factual, practical information or a process (like manufacturing steps or geographic facts), the purpose is considered explicit and straightforward. It does *not* require a literal statement like "The purpose of this text is to..." as long as the delivery of information is direct, clear, and unadorned by persuasive elements or complex framing.\n\n 2. Moderately Complex via Guiding Questions & Inquiry Formats\n If a text begins with a general introduction and uses guiding questions (e.g., "Have you ever wondered how clouds are formed?") to transition into an explanation, the purpose is implied rather than explicitly stated upfront. Because the reader must recognize the question as the pivot point for the author\'s intent, it is Moderately Complex.\n\n 3. Moderately Complex via Multiple Distinct Informational Goals\n If a text covers a broad topic but jumps between several distinct scientific or informational objectives without an overarching framing device or explicit thesis (e.g., talking about measuring ice sheets, then mapping, then finding meteorites), the reader must synthesize these diverse facts to recognize the broader purpose, making it Moderately Complex.\n\n 4. Moderately Complex via Arguments Disguised as Information\n If an author is arguing a specific point, correcting a misconception, or defending a stance, but the text could initially be mistaken by students as purely informative factual text, it is Moderately Complex. The reader must infer the persuasive intent or argumentative purpose beneath the informative tone.\n\n 5. "More Context Needed" for Fragments\n If a text is a fragment missing a crucial introduction or context, and identifying the author\'s purpose beyond a simple surface-level description would be exceptionally difficult for a reader in the target grade level without that external background, score it as `more_context_needed`. \n\n Output Format\n Provide your evaluation in the following structure:\n reasoning:\n - Surface Analysis: Identify if the text clearly identifies its topic and delivers straightforward facts, or if it utilizes structural cues, titles, or direct thesis statements.\n - Subtlety & Framing: Is the informative purpose straightforward and concrete? Does it use guiding questions? Is it an argument disguised as pure information? Are there multiple distinct informational goals requiring synthesis?\n - Context Check: Is this text a fragment missing crucial context that obscures the deeper purpose for the target grade level?\n - Rubric Alignment: Explain how the text aligns with the specific language of the rubric, explicitly referencing the expert rules above. Justify why it isn\'t one level simpler or more complex.\n\n answer:\n - complexity_score: (slightly_complex, moderately_complex, very_complex, exceedingly_complex, more_context_needed)\n - reasoning: A brief summary of your final decision.\n - details: Structured breakdown of PurposeDetails including detailed_summary, adjustment_and_scaffolding, and recommended_use_cases.\n';
|
|
2316
|
+
|
|
2317
|
+
// ../../evals/prompts/purpose/user.txt
|
|
2318
|
+
var user_default4 = "Analyze:\nText: {text}\nGrade: {grade_level}\nFK Score: {fk_score}";
|
|
2319
|
+
|
|
2320
|
+
// ../../evals/prompts/purpose/config.json
|
|
2321
|
+
var config_default = {
|
|
2322
|
+
evaluator: {
|
|
2323
|
+
id: "literacy.gla.purpose",
|
|
2324
|
+
name: "Purpose Dimension Text Complexity Evaluator",
|
|
2325
|
+
description: "Evaluates the Purpose dimension of qualitative text complexity for K-12 reading assessment, producing a 5-level rubric rating with structured pedagogical detail."
|
|
2326
|
+
},
|
|
2327
|
+
preprocessing: [
|
|
2328
|
+
{
|
|
2329
|
+
id: "fk_score",
|
|
2330
|
+
kind: "flesch_kincaid_grade",
|
|
2331
|
+
description: "Compute the Flesch-Kincaid Grade Level for the input text and bind it to {fk_score} in the prompt.",
|
|
2332
|
+
input: "text",
|
|
2333
|
+
output: "fk_score",
|
|
2334
|
+
implementation: {
|
|
2335
|
+
python: {
|
|
2336
|
+
library: "textstat",
|
|
2337
|
+
function: "flesch_kincaid_grade",
|
|
2338
|
+
post_transform: {
|
|
2339
|
+
type: "round",
|
|
2340
|
+
precision: 2
|
|
2341
|
+
}
|
|
2342
|
+
},
|
|
2343
|
+
typescript: {
|
|
2344
|
+
library: "text-readability",
|
|
2345
|
+
function: "fleschKincaidGrade",
|
|
2346
|
+
post_transform: {
|
|
2347
|
+
type: "round",
|
|
2348
|
+
precision: 2
|
|
2349
|
+
}
|
|
2350
|
+
}
|
|
2351
|
+
}
|
|
2352
|
+
}
|
|
2353
|
+
],
|
|
2354
|
+
steps: [
|
|
2355
|
+
{
|
|
2356
|
+
id: "evaluate_purpose",
|
|
2357
|
+
description: "Single-call LLM step that produces the EvaluatorOutput JSON.",
|
|
2358
|
+
prompt: {
|
|
2359
|
+
type: "chat",
|
|
2360
|
+
messages: [
|
|
2361
|
+
{
|
|
2362
|
+
role: "system",
|
|
2363
|
+
source_path: "system.txt",
|
|
2364
|
+
sha256: "745b95b7d54dc845b99363c9d3360355381883c22a5f6a0f305d7349cae38a54"
|
|
2365
|
+
},
|
|
2366
|
+
{
|
|
2367
|
+
role: "user",
|
|
2368
|
+
source_path: "user.txt",
|
|
2369
|
+
sha256: "cd8e6347db1a55d104e34436f8f66e833bd6583645d4786a554aaefdd26479b2"
|
|
2370
|
+
}
|
|
2371
|
+
],
|
|
2372
|
+
placeholders: {
|
|
2373
|
+
text: {
|
|
2374
|
+
required: true,
|
|
2375
|
+
source: "input"
|
|
2376
|
+
},
|
|
2377
|
+
grade_level: {
|
|
2378
|
+
required: true,
|
|
2379
|
+
source: "input"
|
|
2380
|
+
},
|
|
2381
|
+
fk_score: {
|
|
2382
|
+
required: true,
|
|
2383
|
+
source: "preprocessing.fk_score"
|
|
2384
|
+
}
|
|
2385
|
+
}
|
|
2386
|
+
},
|
|
2387
|
+
model: {
|
|
2388
|
+
provider: "google",
|
|
2389
|
+
name: "gemini-3-flash-preview"
|
|
2390
|
+
},
|
|
2391
|
+
generation: {
|
|
2392
|
+
temperature: 0
|
|
2393
|
+
},
|
|
2394
|
+
parser: {
|
|
2395
|
+
kind: "structured_output"
|
|
2396
|
+
},
|
|
2397
|
+
output_binding: "formatted_output"
|
|
2398
|
+
}
|
|
2399
|
+
]};
|
|
2400
|
+
|
|
2401
|
+
// src/prompts/purpose/index.ts
|
|
2402
|
+
var STEP_ID = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
|
|
2403
|
+
var _step = config_default.steps.find((s) => s.id === STEP_ID);
|
|
2404
|
+
if (!_step) throw new Error(`Step "${STEP_ID}" not found in purpose config.json`);
|
|
2405
|
+
var PLACEHOLDER_KEYS = Object.keys(_step.prompt.placeholders);
|
|
2406
|
+
function applyPlaceholders(template, inputs) {
|
|
2407
|
+
return PLACEHOLDER_KEYS.reduce(
|
|
2408
|
+
(text, key) => key in inputs ? text.replaceAll(`{${key}}`, inputs[key]) : text,
|
|
2409
|
+
template
|
|
2410
|
+
);
|
|
2411
|
+
}
|
|
2412
|
+
function getSystemPrompt5(inputs) {
|
|
2413
|
+
return applyPlaceholders(system_default4, inputs);
|
|
2414
|
+
}
|
|
2415
|
+
function getUserPrompt5(inputs) {
|
|
2416
|
+
return applyPlaceholders(user_default4, inputs);
|
|
2417
|
+
}
|
|
2418
|
+
|
|
2419
|
+
// ../../evals/prompts/purpose/input_schema.json
|
|
2420
|
+
var input_schema_default = {
|
|
2421
|
+
properties: {
|
|
2422
|
+
grade_level: {
|
|
2423
|
+
minimum: 3,
|
|
2424
|
+
maximum: 12}
|
|
2425
|
+
}
|
|
2426
|
+
};
|
|
2427
|
+
|
|
2428
|
+
// src/evaluators/purpose.ts
|
|
2429
|
+
var STEP_ID2 = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
|
|
2430
|
+
var _step2 = config_default.steps.find((s) => s.id === STEP_ID2);
|
|
2431
|
+
if (!_step2) throw new Error(`Step "${STEP_ID2}" not found in purpose config.json`);
|
|
2432
|
+
var STEP = _step2;
|
|
2433
|
+
var GRADE_MIN = input_schema_default.properties.grade_level.minimum;
|
|
2434
|
+
var GRADE_MAX = input_schema_default.properties.grade_level.maximum;
|
|
2435
|
+
var SUPPORTED_GRADES = Array.from({ length: GRADE_MAX - GRADE_MIN + 1 }, (_, i) => String(GRADE_MIN + i));
|
|
2436
|
+
var COMPLEXITY_SCORE_DISPLAY = {
|
|
2437
|
+
"slightly_complex": "Slightly complex",
|
|
2438
|
+
"moderately_complex": "Moderately complex",
|
|
2439
|
+
"very_complex": "Very complex",
|
|
2440
|
+
"exceedingly_complex": "Exceedingly complex",
|
|
2441
|
+
"more_context_needed": "More context needed"
|
|
2442
|
+
};
|
|
2443
|
+
var PurposeEvaluator = class _PurposeEvaluator extends BaseEvaluator {
|
|
2444
|
+
static metadata = {
|
|
2445
|
+
id: config_default.evaluator.id,
|
|
2446
|
+
name: config_default.evaluator.name,
|
|
2447
|
+
description: config_default.evaluator.description,
|
|
2448
|
+
supportedGrades: SUPPORTED_GRADES,
|
|
2449
|
+
defaultProviders: ["google" /* Google */]
|
|
2450
|
+
};
|
|
2451
|
+
static TEMPERATURE = STEP.generation.temperature;
|
|
2452
|
+
static computeFkScore(text) {
|
|
2453
|
+
const fkStep = config_default.preprocessing.find((p) => p.id === "fk_score");
|
|
2454
|
+
if (!fkStep) throw new Error("fk_score preprocessing step not found in purpose config.json");
|
|
2455
|
+
return runPreprocessingStep(text, fkStep.implementation.typescript);
|
|
2456
|
+
}
|
|
2457
|
+
provider;
|
|
2458
|
+
constructor(config) {
|
|
2459
|
+
super(config);
|
|
2460
|
+
this.provider = this.createConfiguredProvider(
|
|
2461
|
+
"google" /* Google */,
|
|
2462
|
+
STEP.model.name,
|
|
2463
|
+
config.googleApiKey
|
|
2464
|
+
);
|
|
2465
|
+
}
|
|
2466
|
+
/**
|
|
2467
|
+
* Evaluate purpose complexity for a given text and grade level
|
|
2468
|
+
*
|
|
2469
|
+
* @param text - The text to evaluate
|
|
2470
|
+
* @param grade - The target grade level (3-12)
|
|
2471
|
+
* @returns Evaluation result with complexity score and detailed analysis
|
|
2472
|
+
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2473
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2474
|
+
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
2475
|
+
*/
|
|
2476
|
+
async evaluate(text, grade) {
|
|
2477
|
+
this.logger.info("Starting Purpose evaluation", {
|
|
2478
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2479
|
+
operation: "evaluate",
|
|
2480
|
+
grade,
|
|
2481
|
+
textLength: text.length
|
|
2482
|
+
});
|
|
2483
|
+
const startTime = Date.now();
|
|
2484
|
+
const stageDetails = [];
|
|
2485
|
+
try {
|
|
2486
|
+
this.validateText(text);
|
|
2487
|
+
const gradeNum = this.parseAndValidateGrade(grade);
|
|
2488
|
+
const fkScore = _PurposeEvaluator.computeFkScore(text);
|
|
2489
|
+
const inputs = {
|
|
2490
|
+
text,
|
|
2491
|
+
grade_level: String(gradeNum),
|
|
2492
|
+
fk_score: String(fkScore)
|
|
2493
|
+
};
|
|
2494
|
+
const response = await this.callLLM(inputs);
|
|
2495
|
+
const latencyMs = Date.now() - startTime;
|
|
2496
|
+
const tokenUsage = {
|
|
2497
|
+
input_tokens: response.usage.inputTokens,
|
|
2498
|
+
output_tokens: response.usage.outputTokens
|
|
2499
|
+
};
|
|
2500
|
+
stageDetails.push({
|
|
2501
|
+
stage: STEP.id,
|
|
2502
|
+
provider: this.provider.label,
|
|
2503
|
+
latency_ms: response.latencyMs,
|
|
2504
|
+
token_usage: tokenUsage
|
|
2505
|
+
});
|
|
2506
|
+
const result = {
|
|
2507
|
+
score: COMPLEXITY_SCORE_DISPLAY[response.data.complexity_score],
|
|
2508
|
+
reasoning: response.data.reasoning,
|
|
2509
|
+
metadata: {
|
|
2510
|
+
model: this.provider.label,
|
|
2511
|
+
processingTimeMs: latencyMs,
|
|
2512
|
+
inputTokens: tokenUsage.input_tokens,
|
|
2513
|
+
outputTokens: tokenUsage.output_tokens
|
|
2514
|
+
},
|
|
2515
|
+
_internal: response.data
|
|
2516
|
+
};
|
|
2517
|
+
this.sendTelemetry({
|
|
2518
|
+
status: "success",
|
|
2519
|
+
latencyMs,
|
|
2520
|
+
textLength: text.length,
|
|
2521
|
+
grade: String(gradeNum),
|
|
2522
|
+
provider: this.provider.label,
|
|
2523
|
+
tokenUsage,
|
|
2524
|
+
metadata: { stage_details: stageDetails },
|
|
2525
|
+
inputText: text
|
|
2526
|
+
}).catch(() => void 0);
|
|
2527
|
+
this.logger.info("Purpose evaluation completed successfully", {
|
|
2528
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2529
|
+
operation: "evaluate",
|
|
2530
|
+
grade: gradeNum,
|
|
2531
|
+
score: result.score,
|
|
2532
|
+
processingTimeMs: latencyMs
|
|
2533
|
+
});
|
|
2534
|
+
return result;
|
|
2535
|
+
} catch (error) {
|
|
2536
|
+
const latencyMs = Date.now() - startTime;
|
|
2537
|
+
this.logger.error("Purpose evaluation failed", {
|
|
2538
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2539
|
+
operation: "evaluate",
|
|
2540
|
+
grade,
|
|
2541
|
+
error: error instanceof Error ? error : void 0,
|
|
2542
|
+
processingTimeMs: latencyMs
|
|
2543
|
+
});
|
|
2544
|
+
const tokenUsage = stageDetails.length > 0 ? {
|
|
2545
|
+
input_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.input_tokens ?? 0), 0),
|
|
2546
|
+
output_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.output_tokens ?? 0), 0)
|
|
2547
|
+
} : void 0;
|
|
2548
|
+
this.sendTelemetry({
|
|
2549
|
+
status: "error",
|
|
2550
|
+
latencyMs,
|
|
2551
|
+
textLength: text.length,
|
|
2552
|
+
grade: String(grade),
|
|
2553
|
+
provider: this.provider.label,
|
|
2554
|
+
tokenUsage,
|
|
2555
|
+
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
2556
|
+
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
2557
|
+
inputText: text
|
|
2558
|
+
}).catch(() => void 0);
|
|
2559
|
+
if (error instanceof ValidationError) throw error;
|
|
2560
|
+
throw wrapProviderError(error, "Purpose evaluation failed");
|
|
2561
|
+
}
|
|
2562
|
+
}
|
|
2563
|
+
parseAndValidateGrade(grade) {
|
|
2564
|
+
const num = Number(grade.trim());
|
|
2565
|
+
if (!Number.isInteger(num) || num < GRADE_MIN || num > GRADE_MAX) {
|
|
2566
|
+
throw new ValidationError(
|
|
2567
|
+
`Invalid grade "${grade}". Purpose evaluator supports integer grades ${GRADE_MIN}\u2013${GRADE_MAX}.`
|
|
2568
|
+
);
|
|
2569
|
+
}
|
|
2570
|
+
return num;
|
|
2571
|
+
}
|
|
2572
|
+
async callLLM(inputs) {
|
|
2573
|
+
const response = await this.provider.generateStructured({
|
|
2574
|
+
messages: [
|
|
2575
|
+
{ role: "system", content: getSystemPrompt5(inputs) },
|
|
2576
|
+
{ role: "user", content: getUserPrompt5(inputs) }
|
|
2577
|
+
],
|
|
2578
|
+
schema: PurposeOutputSchema,
|
|
2579
|
+
temperature: _PurposeEvaluator.TEMPERATURE
|
|
2580
|
+
});
|
|
2581
|
+
return { data: response.data, usage: response.usage, latencyMs: response.latencyMs };
|
|
2582
|
+
}
|
|
2583
|
+
};
|
|
2191
2584
|
|
|
2192
2585
|
// src/batch/evaluator.ts
|
|
2193
2586
|
var EVALUATOR_MAP = /* @__PURE__ */ new Map([
|
|
@@ -2195,19 +2588,21 @@ var EVALUATOR_MAP = /* @__PURE__ */ new Map([
|
|
|
2195
2588
|
[SmkEvaluator.metadata.id, SmkEvaluator],
|
|
2196
2589
|
[VocabularyEvaluator.metadata.id, VocabularyEvaluator],
|
|
2197
2590
|
[SentenceStructureEvaluator.metadata.id, SentenceStructureEvaluator],
|
|
2198
|
-
[ConventionalityEvaluator.metadata.id, ConventionalityEvaluator]
|
|
2591
|
+
[ConventionalityEvaluator.metadata.id, ConventionalityEvaluator],
|
|
2592
|
+
[PurposeEvaluator.metadata.id, PurposeEvaluator]
|
|
2199
2593
|
]);
|
|
2200
2594
|
var EVALUATOR_GROUPS = [
|
|
2201
2595
|
{
|
|
2202
2596
|
id: "text-complexity",
|
|
2203
2597
|
name: "Text Complexity Analysis",
|
|
2204
|
-
description: "Evaluates
|
|
2598
|
+
description: "Evaluates all dimensions of the Qualitative Text Complexity rubric",
|
|
2205
2599
|
evaluatorIds: [
|
|
2206
2600
|
GradeLevelAppropriatenessEvaluator.metadata.id,
|
|
2207
2601
|
SmkEvaluator.metadata.id,
|
|
2208
2602
|
VocabularyEvaluator.metadata.id,
|
|
2209
2603
|
SentenceStructureEvaluator.metadata.id,
|
|
2210
|
-
ConventionalityEvaluator.metadata.id
|
|
2604
|
+
ConventionalityEvaluator.metadata.id,
|
|
2605
|
+
PurposeEvaluator.metadata.id
|
|
2211
2606
|
],
|
|
2212
2607
|
requiresGoogleKey: true,
|
|
2213
2608
|
requiresOpenAIKey: true,
|
|
@@ -2228,6 +2623,7 @@ var BatchEvaluator = class {
|
|
|
2228
2623
|
concurrency: 3,
|
|
2229
2624
|
maxRetries: 2,
|
|
2230
2625
|
telemetry: false,
|
|
2626
|
+
bypassRowLimit: false,
|
|
2231
2627
|
...config
|
|
2232
2628
|
};
|
|
2233
2629
|
this.limit = pLimit(this.config.concurrency);
|
|
@@ -2381,9 +2777,9 @@ var BatchEvaluator = class {
|
|
|
2381
2777
|
`Unknown evaluator group: "${groupId}". Available: ${EVALUATOR_GROUPS.map((g) => g.id).join(", ")}`
|
|
2382
2778
|
);
|
|
2383
2779
|
}
|
|
2384
|
-
if (inputs.length > group.maxInputRows) {
|
|
2780
|
+
if (!this.config.bypassRowLimit && inputs.length > group.maxInputRows) {
|
|
2385
2781
|
throw new Error(
|
|
2386
|
-
`Input exceeds limit for "${group.id}": ${inputs.length} rows (max ${group.maxInputRows}). Split into smaller batches.`
|
|
2782
|
+
`Input exceeds limit for "${group.id}": ${inputs.length} rows (max ${group.maxInputRows}). Split into smaller batches, or pass { bypassRowLimit: true } in BatchConfig to bypass (use --bypass-row-limit on the CLI).`
|
|
2387
2783
|
);
|
|
2388
2784
|
}
|
|
2389
2785
|
this.isCancelled = false;
|
|
@@ -3298,6 +3694,8 @@ var COMPLEXITY_SCORE_MAP = {
|
|
|
3298
3694
|
"moderately complex": 2,
|
|
3299
3695
|
"very complex": 3,
|
|
3300
3696
|
"exceedingly complex": 4
|
|
3697
|
+
// 'more context needed' has no numeric equivalent — rows with this score appear as N/A
|
|
3698
|
+
// in individual results and are excluded from aggregate stats, same as failed evaluations.
|
|
3301
3699
|
};
|
|
3302
3700
|
function evaluatorDisplayName(id) {
|
|
3303
3701
|
return id.split("-").map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
|
|
@@ -3352,7 +3750,8 @@ function groupResultsByRow(results) {
|
|
|
3352
3750
|
return grouped;
|
|
3353
3751
|
}
|
|
3354
3752
|
function formatEvaluatorPrefix(evaluatorId) {
|
|
3355
|
-
|
|
3753
|
+
const slug = evaluatorId.includes(".") ? evaluatorId.split(".").pop() : evaluatorId;
|
|
3754
|
+
return slug.replace(/-/g, "_");
|
|
3356
3755
|
}
|
|
3357
3756
|
function escapeCSV(field) {
|
|
3358
3757
|
if (field.includes(",") || field.includes('"') || field.includes("\n")) {
|
|
@@ -3713,6 +4112,8 @@ function parseArgs() {
|
|
|
3713
4112
|
if (!isNaN(v) && v >= 0) result.maxRetries = v;
|
|
3714
4113
|
} else if (args[i] === "--no-telemetry") {
|
|
3715
4114
|
result.noTelemetry = true;
|
|
4115
|
+
} else if (args[i] === "--bypass-row-limit") {
|
|
4116
|
+
result.bypassRowLimit = true;
|
|
3716
4117
|
}
|
|
3717
4118
|
}
|
|
3718
4119
|
return result;
|
|
@@ -3746,16 +4147,22 @@ async function main() {
|
|
|
3746
4147
|
`);
|
|
3747
4148
|
const group = getAvailableGroups()[0];
|
|
3748
4149
|
console.log(`\u2713 Evaluator group: ${group.name}`);
|
|
3749
|
-
console.log(` ${group.description}
|
|
3750
|
-
console.log(` Row limit: ${group.maxInputRows}
|
|
4150
|
+
console.log(` ${group.description}
|
|
3751
4151
|
`);
|
|
3752
4152
|
if (inputs.length > group.maxInputRows) {
|
|
3753
|
-
|
|
4153
|
+
if (cliArgs.bypassRowLimit) {
|
|
4154
|
+
console.warn(`\u26A0\uFE0F Row limit bypassed: ${inputs.length} rows (default max ${group.maxInputRows}).`);
|
|
4155
|
+
console.warn(` Expect longer runtime and possible provider throttling.
|
|
3754
4156
|
`);
|
|
3755
|
-
|
|
3756
|
-
|
|
3757
|
-
|
|
3758
|
-
|
|
4157
|
+
} else {
|
|
4158
|
+
console.error(`\u274C Too many rows: ${inputs.length} (max ${group.maxInputRows} for this group)
|
|
4159
|
+
`);
|
|
4160
|
+
console.log("Suggestions:");
|
|
4161
|
+
console.log(` \u2022 Trim the CSV to ${group.maxInputRows} rows`);
|
|
4162
|
+
console.log(" \u2022 Split into multiple smaller batches");
|
|
4163
|
+
console.log(" \u2022 Re-run with --bypass-row-limit to skip this check (use with caution)\n");
|
|
4164
|
+
process.exit(1);
|
|
4165
|
+
}
|
|
3759
4166
|
}
|
|
3760
4167
|
let googleApiKey;
|
|
3761
4168
|
let openaiApiKey;
|
|
@@ -3832,7 +4239,7 @@ async function main() {
|
|
|
3832
4239
|
const totalTasks = inputs.length * group.evaluatorIds.length;
|
|
3833
4240
|
console.log(`
|
|
3834
4241
|
\u{1F4DD} Summary:`);
|
|
3835
|
-
console.log(` Input rows: ${inputs.length}`);
|
|
4242
|
+
console.log(` Input rows: ${inputs.length}${cliArgs.bypassRowLimit ? " (row limit bypassed)" : ""}`);
|
|
3836
4243
|
console.log(` Evaluators: ${group.evaluatorIds.length}`);
|
|
3837
4244
|
console.log(` Total tasks: ${totalTasks}`);
|
|
3838
4245
|
console.log(` Concurrency: ${cliArgs.concurrency ?? 3}`);
|
|
@@ -3857,7 +4264,8 @@ async function main() {
|
|
|
3857
4264
|
openaiApiKey,
|
|
3858
4265
|
concurrency: cliArgs.concurrency ?? 3,
|
|
3859
4266
|
maxRetries: cliArgs.maxRetries ?? 2,
|
|
3860
|
-
telemetry: !cliArgs.noTelemetry
|
|
4267
|
+
telemetry: !cliArgs.noTelemetry,
|
|
4268
|
+
bypassRowLimit: cliArgs.bypassRowLimit ?? false
|
|
3861
4269
|
});
|
|
3862
4270
|
let isShuttingDown = false;
|
|
3863
4271
|
const handleShutdown = () => {
|