@learning-commons/evaluators 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +188 -45
- package/dist/{base-Ced9oKKa.d.cts → base-DKcAYXfb.d.cts} +142 -9
- package/dist/{base-Ced9oKKa.d.ts → base-DKcAYXfb.d.ts} +142 -9
- package/dist/batch/cli.js +635 -227
- package/dist/batch/cli.js.map +1 -1
- package/dist/batch/index.cjs +618 -218
- package/dist/batch/index.cjs.map +1 -1
- package/dist/batch/index.d.cts +3 -1
- package/dist/batch/index.d.ts +3 -1
- package/dist/batch/index.js +617 -218
- package/dist/batch/index.js.map +1 -1
- package/dist/index.cjs +626 -217
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +155 -86
- package/dist/index.d.ts +155 -86
- package/dist/index.js +622 -218
- package/dist/index.js.map +1 -1
- package/package.json +13 -4
- package/src/batch/README.md +14 -1
package/dist/index.js
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
+
import { generateText, Output } from 'ai';
|
|
2
3
|
import { randomUUID } from 'crypto';
|
|
3
4
|
import { readFileSync, mkdirSync, writeFileSync } from 'fs';
|
|
4
5
|
import { dirname, join } from 'path';
|
|
5
6
|
import { homedir } from 'os';
|
|
6
7
|
import { fileURLToPath } from 'url';
|
|
7
|
-
import { generateText, Output } from 'ai';
|
|
8
8
|
import nlp from 'compromise';
|
|
9
9
|
import { syllable } from 'syllable';
|
|
10
|
+
import textReadability from 'text-readability';
|
|
10
11
|
import pLimit from 'p-limit';
|
|
11
12
|
|
|
12
13
|
// src/schemas/outputs.ts
|
|
@@ -25,6 +26,7 @@ var GradeLevelAppropriatenessSchema = z.object({
|
|
|
25
26
|
alternative_grade: GradeBand.describe("An alternative grade level for the text"),
|
|
26
27
|
scaffolding_needed: z.string().describe("Scaffolding needed for the text to be appropriate for the alternative grade")
|
|
27
28
|
});
|
|
29
|
+
var PurposeOutputSchema = z.object({ "complexity_score": z.enum(["slightly_complex", "moderately_complex", "very_complex", "exceedingly_complex", "more_context_needed"]).describe("The Purpose complexity level for the target grade."), "reasoning": z.string().describe("A high-level summary of why the text is at this complexity level for the target grade."), "details": z.object({ "detailed_summary": z.array(z.object({ "factor": z.string().describe("The specific text complexity factor identified."), "description": z.string().describe("How this factor manifests in the text."), "effect_on_complexity_dimension": z.string().describe("How this factor affects the reader's ability to understand the text's specific complexity dimension.") }).strict()).describe("Individual complexity factors with descriptions and their effects."), "adjustment_and_scaffolding": z.array(z.object({ "scaffolding_need": z.string().describe("The complexity factor that requires scaffolding."), "suggestion": z.string().describe("A specific instructional strategy to support students with this factor.") }).strict()).describe("Scaffolding strategies to make the text accessible at the target grade."), "recommended_use_cases": z.array(z.object({ "opportunity": z.string().describe("An instructional opportunity related to the text."), "suggestion": z.string().describe("A specific way to leverage this text for that instructional purpose.") }).strict()).describe("Additional instructional opportunities for using this text.") }).strict().describe("Practical instructional details including scaffolding strategies and recommended use cases.") }).strict();
|
|
28
30
|
|
|
29
31
|
// src/errors.ts
|
|
30
32
|
var EvaluatorError = class extends Error {
|
|
@@ -85,8 +87,9 @@ var TimeoutError = class extends APIError {
|
|
|
85
87
|
function parseProviderError(error) {
|
|
86
88
|
if (error instanceof Error) {
|
|
87
89
|
const message = error.message;
|
|
90
|
+
const err = error;
|
|
88
91
|
const statusMatch = message.match(/\b(4\d{2}|5\d{2})\b/);
|
|
89
|
-
const statusCode = statusMatch ? parseInt(statusMatch[1]) : void 0;
|
|
92
|
+
const statusCode = err.statusCode ?? err.status ?? (statusMatch ? parseInt(statusMatch[1]) : void 0);
|
|
90
93
|
return {
|
|
91
94
|
message,
|
|
92
95
|
statusCode,
|
|
@@ -99,6 +102,11 @@ function parseProviderError(error) {
|
|
|
99
102
|
}
|
|
100
103
|
function wrapProviderError(error, defaultMessage = "API request failed") {
|
|
101
104
|
const { message, statusCode, code } = parseProviderError(error);
|
|
105
|
+
if (statusCode === 404 || statusCode === 400 && /\bmodel\b.*(not found|does not exist|invalid)/i.test(message)) {
|
|
106
|
+
return new ConfigurationError(
|
|
107
|
+
`Model not found or invalid: ${message}. Check the model ID passed to the provider.`
|
|
108
|
+
);
|
|
109
|
+
}
|
|
102
110
|
if (statusCode === 401 || statusCode === 403) {
|
|
103
111
|
return new AuthenticationError(
|
|
104
112
|
message.includes("API key") ? message : "Invalid API key",
|
|
@@ -181,6 +189,119 @@ function createLogger(customLogger, level = 2 /* WARN */) {
|
|
|
181
189
|
}
|
|
182
190
|
return new ConsoleLogger(level);
|
|
183
191
|
}
|
|
192
|
+
|
|
193
|
+
// src/providers/base.ts
|
|
194
|
+
var Providers = {
|
|
195
|
+
google: "google",
|
|
196
|
+
openai: "openai",
|
|
197
|
+
anthropic: "anthropic",
|
|
198
|
+
custom: "custom"
|
|
199
|
+
};
|
|
200
|
+
var VercelAIProvider = class {
|
|
201
|
+
constructor(config) {
|
|
202
|
+
this.config = config;
|
|
203
|
+
if (config.type === "custom") {
|
|
204
|
+
throw new Error(
|
|
205
|
+
"VercelAIProvider does not support custom type. Use config.customProvider directly."
|
|
206
|
+
);
|
|
207
|
+
}
|
|
208
|
+
if (!config.model || config.model.trim() === "") {
|
|
209
|
+
throw new Error(
|
|
210
|
+
`model is required for VercelAIProvider (type: "${config.type}"). No default is assumed.`
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
this.model = config.model;
|
|
214
|
+
this.label = `${config.type}:${config.model}`;
|
|
215
|
+
}
|
|
216
|
+
label;
|
|
217
|
+
model;
|
|
218
|
+
/**
|
|
219
|
+
* Generate structured output using Vercel AI SDK's generateText with output
|
|
220
|
+
*/
|
|
221
|
+
async generateStructured(request) {
|
|
222
|
+
const model = await this.getModel();
|
|
223
|
+
const startTime = Date.now();
|
|
224
|
+
const { output, usage } = await generateText({
|
|
225
|
+
model,
|
|
226
|
+
messages: request.messages,
|
|
227
|
+
output: Output.object({ schema: request.schema }),
|
|
228
|
+
temperature: request.temperature ?? 0,
|
|
229
|
+
maxRetries: this.config.maxRetries ?? 0,
|
|
230
|
+
...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
|
|
231
|
+
});
|
|
232
|
+
return {
|
|
233
|
+
data: output,
|
|
234
|
+
model: this.model,
|
|
235
|
+
usage: {
|
|
236
|
+
inputTokens: usage.inputTokens || 0,
|
|
237
|
+
outputTokens: usage.outputTokens || 0
|
|
238
|
+
},
|
|
239
|
+
latencyMs: Date.now() - startTime
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Generate plain text using Vercel AI SDK's generateText
|
|
244
|
+
*/
|
|
245
|
+
async generateText(messages, temperature) {
|
|
246
|
+
const model = await this.getModel();
|
|
247
|
+
const startTime = Date.now();
|
|
248
|
+
const { text, usage } = await generateText({
|
|
249
|
+
model,
|
|
250
|
+
messages,
|
|
251
|
+
temperature: temperature ?? this.config.temperature ?? 0,
|
|
252
|
+
maxRetries: this.config.maxRetries ?? 0
|
|
253
|
+
});
|
|
254
|
+
return {
|
|
255
|
+
text,
|
|
256
|
+
usage: {
|
|
257
|
+
inputTokens: usage.inputTokens || 0,
|
|
258
|
+
outputTokens: usage.outputTokens || 0
|
|
259
|
+
},
|
|
260
|
+
latencyMs: Date.now() - startTime
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Get the configured language model.
|
|
265
|
+
* Uses dynamic imports so consumers only need to install the provider packages they use.
|
|
266
|
+
*/
|
|
267
|
+
async getModel() {
|
|
268
|
+
const apiKey = this.config.apiKey;
|
|
269
|
+
switch (this.config.type) {
|
|
270
|
+
case "openai": {
|
|
271
|
+
const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
|
|
272
|
+
throw new Error(
|
|
273
|
+
"To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
|
|
274
|
+
);
|
|
275
|
+
});
|
|
276
|
+
return createOpenAI(apiKey ? { apiKey } : {})(this.model);
|
|
277
|
+
}
|
|
278
|
+
case "anthropic": {
|
|
279
|
+
const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
|
|
280
|
+
throw new Error(
|
|
281
|
+
"To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
|
|
282
|
+
);
|
|
283
|
+
});
|
|
284
|
+
return createAnthropic(apiKey ? { apiKey } : {})(this.model);
|
|
285
|
+
}
|
|
286
|
+
case "google": {
|
|
287
|
+
const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
|
|
288
|
+
throw new Error(
|
|
289
|
+
"To use the Google provider, install its adapter: npm install @ai-sdk/google"
|
|
290
|
+
);
|
|
291
|
+
});
|
|
292
|
+
return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(this.model);
|
|
293
|
+
}
|
|
294
|
+
default:
|
|
295
|
+
throw new Error(`Unsupported provider type: ${this.config.type}`);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
};
|
|
299
|
+
function createProvider(config) {
|
|
300
|
+
if (config.type === "custom" && config.customProvider) {
|
|
301
|
+
return config.customProvider;
|
|
302
|
+
}
|
|
303
|
+
return new VercelAIProvider(config);
|
|
304
|
+
}
|
|
184
305
|
var SentenceAnalysisSchema = z.object({
|
|
185
306
|
reasoning: z.string().describe("Step-by-step reasoning for the analysis"),
|
|
186
307
|
// Foundational
|
|
@@ -340,6 +461,12 @@ var VALIDATION_LIMITS = {
|
|
|
340
461
|
/** Maximum text length in characters (100K chars ≈ 25K tokens) */
|
|
341
462
|
MAX_TEXT_LENGTH: 1e5
|
|
342
463
|
};
|
|
464
|
+
var Provider = /* @__PURE__ */ ((Provider2) => {
|
|
465
|
+
Provider2["OpenAI"] = "openai";
|
|
466
|
+
Provider2["Google"] = "google";
|
|
467
|
+
Provider2["Anthropic"] = "anthropic";
|
|
468
|
+
return Provider2;
|
|
469
|
+
})(Provider || {});
|
|
343
470
|
var BaseEvaluator = class {
|
|
344
471
|
telemetryClient;
|
|
345
472
|
logger;
|
|
@@ -357,21 +484,35 @@ var BaseEvaluator = class {
|
|
|
357
484
|
* name: 'My Evaluator',
|
|
358
485
|
* description: 'Does something useful',
|
|
359
486
|
* supportedGrades: ['3', '4', '5'],
|
|
360
|
-
*
|
|
361
|
-
* requiresOpenAIKey: false,
|
|
487
|
+
* defaultProviders: [Provider.Google],
|
|
362
488
|
* };
|
|
363
489
|
* }
|
|
364
490
|
* ```
|
|
365
491
|
*/
|
|
366
492
|
static metadata;
|
|
493
|
+
/**
|
|
494
|
+
* @throws {ConfigurationError} If the subclass has not defined static metadata
|
|
495
|
+
* @throws {ConfigurationError} If modelOverride has an invalid provider or empty model
|
|
496
|
+
* @throws {ConfigurationError} If a required API key is missing
|
|
497
|
+
*/
|
|
367
498
|
constructor(config) {
|
|
368
499
|
this.logger = createLogger(config.logger, config.logLevel ?? 2 /* WARN */);
|
|
500
|
+
this.validateModelOverride(config);
|
|
369
501
|
this.validateApiKeys(config);
|
|
370
502
|
const telemetryConfig = this.normalizeTelemetryConfig(config.telemetry);
|
|
371
503
|
this.config = {
|
|
372
504
|
maxRetries: config.maxRetries ?? 2,
|
|
373
|
-
telemetry: telemetryConfig
|
|
505
|
+
telemetry: telemetryConfig,
|
|
506
|
+
modelOverride: config.modelOverride,
|
|
507
|
+
googleApiKey: config.googleApiKey,
|
|
508
|
+
openaiApiKey: config.openaiApiKey,
|
|
509
|
+
anthropicApiKey: config.anthropicApiKey
|
|
374
510
|
};
|
|
511
|
+
if (config.modelOverride) {
|
|
512
|
+
this.logger.warn(
|
|
513
|
+
`modelOverride is active: using ${config.modelOverride.provider}:${config.modelOverride.model} instead of the default model. Evaluation quality may differ from recommended defaults.`
|
|
514
|
+
);
|
|
515
|
+
}
|
|
375
516
|
if (this.config.telemetry.enabled) {
|
|
376
517
|
this.telemetryClient = new TelemetryClient({
|
|
377
518
|
endpoint: "https://api.learningcommons.org/evaluators-telemetry/v1/events",
|
|
@@ -396,21 +537,62 @@ var BaseEvaluator = class {
|
|
|
396
537
|
return meta;
|
|
397
538
|
}
|
|
398
539
|
/**
|
|
399
|
-
* Validate
|
|
400
|
-
*
|
|
540
|
+
* Validate modelOverride shape: provider must be a known Provider value and
|
|
541
|
+
* model must be a non-empty string.
|
|
542
|
+
* @throws {ConfigurationError} If the override is malformed
|
|
401
543
|
*/
|
|
402
|
-
|
|
403
|
-
if (
|
|
544
|
+
validateModelOverride(config) {
|
|
545
|
+
if (!config.modelOverride) return;
|
|
546
|
+
const validProviders = Object.values(Provider);
|
|
547
|
+
if (!validProviders.includes(config.modelOverride.provider)) {
|
|
404
548
|
throw new ConfigurationError(
|
|
405
|
-
`
|
|
549
|
+
`Invalid provider "${config.modelOverride.provider}" in modelOverride. Valid providers are: ${validProviders.join(", ")}.`
|
|
406
550
|
);
|
|
407
551
|
}
|
|
408
|
-
if (
|
|
552
|
+
if (!config.modelOverride.model || config.modelOverride.model.trim() === "") {
|
|
409
553
|
throw new ConfigurationError(
|
|
410
|
-
`
|
|
554
|
+
`modelOverride.model is required. Specify the model ID for provider "${config.modelOverride.provider}".`
|
|
411
555
|
);
|
|
412
556
|
}
|
|
413
557
|
}
|
|
558
|
+
/**
|
|
559
|
+
* Validate that the required API key is present.
|
|
560
|
+
* When modelOverride is set, checks the override provider's key.
|
|
561
|
+
* Otherwise checks the keys required by the evaluator's default providers.
|
|
562
|
+
* @throws {ConfigurationError} If a required key is missing
|
|
563
|
+
*/
|
|
564
|
+
validateApiKeys(config) {
|
|
565
|
+
const keyFor = {
|
|
566
|
+
["openai" /* OpenAI */]: config.openaiApiKey?.trim() || void 0,
|
|
567
|
+
["google" /* Google */]: config.googleApiKey?.trim() || void 0,
|
|
568
|
+
["anthropic" /* Anthropic */]: config.anthropicApiKey?.trim() || void 0
|
|
569
|
+
};
|
|
570
|
+
const humanName = {
|
|
571
|
+
["openai" /* OpenAI */]: "OpenAI API key",
|
|
572
|
+
["google" /* Google */]: "Google API key",
|
|
573
|
+
["anthropic" /* Anthropic */]: "Anthropic API key"
|
|
574
|
+
};
|
|
575
|
+
const configKey = {
|
|
576
|
+
["openai" /* OpenAI */]: "openaiApiKey",
|
|
577
|
+
["google" /* Google */]: "googleApiKey",
|
|
578
|
+
["anthropic" /* Anthropic */]: "anthropicApiKey"
|
|
579
|
+
};
|
|
580
|
+
if (config.modelOverride) {
|
|
581
|
+
if (!keyFor[config.modelOverride.provider]) {
|
|
582
|
+
throw new ConfigurationError(
|
|
583
|
+
`${humanName[config.modelOverride.provider]} is required when using modelOverride with provider "${config.modelOverride.provider}". Pass ${configKey[config.modelOverride.provider]} in config.`
|
|
584
|
+
);
|
|
585
|
+
}
|
|
586
|
+
return;
|
|
587
|
+
}
|
|
588
|
+
for (const provider of this.metadata.defaultProviders) {
|
|
589
|
+
if (!keyFor[provider]) {
|
|
590
|
+
throw new ConfigurationError(
|
|
591
|
+
`${humanName[provider]} is required for ${this.metadata.name} evaluator. Pass ${configKey[provider]} in config.`
|
|
592
|
+
);
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
}
|
|
414
596
|
/**
|
|
415
597
|
* Normalize telemetry config to standard format
|
|
416
598
|
*/
|
|
@@ -491,6 +673,33 @@ var BaseEvaluator = class {
|
|
|
491
673
|
);
|
|
492
674
|
}
|
|
493
675
|
}
|
|
676
|
+
/**
|
|
677
|
+
* Create an LLM provider, honouring modelOverride if set.
|
|
678
|
+
* When override is active, the key for the override provider is resolved
|
|
679
|
+
* from the matching top-level config field (e.g. anthropicApiKey for Anthropic).
|
|
680
|
+
*/
|
|
681
|
+
createConfiguredProvider(defaultType, defaultModel, defaultApiKey) {
|
|
682
|
+
const override = this.config.modelOverride;
|
|
683
|
+
if (override) {
|
|
684
|
+
const apiKeyFor = {
|
|
685
|
+
["openai" /* OpenAI */]: this.config.openaiApiKey,
|
|
686
|
+
["google" /* Google */]: this.config.googleApiKey,
|
|
687
|
+
["anthropic" /* Anthropic */]: this.config.anthropicApiKey
|
|
688
|
+
};
|
|
689
|
+
return createProvider({
|
|
690
|
+
type: override.provider,
|
|
691
|
+
model: override.model,
|
|
692
|
+
apiKey: apiKeyFor[override.provider],
|
|
693
|
+
maxRetries: this.config.maxRetries
|
|
694
|
+
});
|
|
695
|
+
}
|
|
696
|
+
return createProvider({
|
|
697
|
+
type: defaultType,
|
|
698
|
+
model: defaultModel,
|
|
699
|
+
apiKey: defaultApiKey,
|
|
700
|
+
maxRetries: this.config.maxRetries
|
|
701
|
+
});
|
|
702
|
+
}
|
|
494
703
|
/**
|
|
495
704
|
* Send telemetry event to analytics service
|
|
496
705
|
* Common helper for all evaluators
|
|
@@ -511,123 +720,12 @@ var BaseEvaluator = class {
|
|
|
511
720
|
provider: params.provider,
|
|
512
721
|
token_usage: params.tokenUsage,
|
|
513
722
|
metadata: params.metadata,
|
|
723
|
+
model_override: this.config.modelOverride ? true : void 0,
|
|
514
724
|
// Include input text only if recording is enabled
|
|
515
725
|
input_text: this.config.telemetry.recordInputs ? params.inputText : void 0
|
|
516
726
|
});
|
|
517
727
|
}
|
|
518
728
|
};
|
|
519
|
-
var DEFAULT_MODELS = {
|
|
520
|
-
openai: "gpt-4o",
|
|
521
|
-
anthropic: "claude-sonnet-4-5-20250929",
|
|
522
|
-
google: "gemini-2.5-pro"
|
|
523
|
-
};
|
|
524
|
-
var VercelAIProvider = class {
|
|
525
|
-
constructor(config) {
|
|
526
|
-
this.config = config;
|
|
527
|
-
if (config.type === "custom") {
|
|
528
|
-
throw new Error(
|
|
529
|
-
"VercelAIProvider does not support custom type. Use config.customProvider directly."
|
|
530
|
-
);
|
|
531
|
-
}
|
|
532
|
-
}
|
|
533
|
-
/**
|
|
534
|
-
* Generate structured output using Vercel AI SDK's generateText with output
|
|
535
|
-
*/
|
|
536
|
-
async generateStructured(request) {
|
|
537
|
-
const model = await this.getModel(request.model);
|
|
538
|
-
const startTime = Date.now();
|
|
539
|
-
const { output, usage } = await generateText({
|
|
540
|
-
model,
|
|
541
|
-
messages: request.messages,
|
|
542
|
-
output: Output.object({ schema: request.schema }),
|
|
543
|
-
temperature: request.temperature ?? 0,
|
|
544
|
-
maxRetries: this.config.maxRetries ?? 0,
|
|
545
|
-
...request.maxTokens !== void 0 ? { maxTokens: request.maxTokens } : {}
|
|
546
|
-
});
|
|
547
|
-
return {
|
|
548
|
-
data: output,
|
|
549
|
-
model: request.model || this.getDefaultModel(),
|
|
550
|
-
usage: {
|
|
551
|
-
inputTokens: usage.inputTokens || 0,
|
|
552
|
-
outputTokens: usage.outputTokens || 0
|
|
553
|
-
},
|
|
554
|
-
latencyMs: Date.now() - startTime
|
|
555
|
-
};
|
|
556
|
-
}
|
|
557
|
-
/**
|
|
558
|
-
* Generate plain text using Vercel AI SDK's generateText
|
|
559
|
-
*/
|
|
560
|
-
async generateText(messages, temperature) {
|
|
561
|
-
const model = await this.getModel();
|
|
562
|
-
const startTime = Date.now();
|
|
563
|
-
const { text, usage } = await generateText({
|
|
564
|
-
model,
|
|
565
|
-
messages,
|
|
566
|
-
temperature: temperature ?? this.config.temperature ?? 0,
|
|
567
|
-
maxRetries: this.config.maxRetries ?? 0
|
|
568
|
-
});
|
|
569
|
-
return {
|
|
570
|
-
text,
|
|
571
|
-
usage: {
|
|
572
|
-
inputTokens: usage.inputTokens || 0,
|
|
573
|
-
outputTokens: usage.outputTokens || 0
|
|
574
|
-
},
|
|
575
|
-
latencyMs: Date.now() - startTime
|
|
576
|
-
};
|
|
577
|
-
}
|
|
578
|
-
/**
|
|
579
|
-
* Get the configured language model.
|
|
580
|
-
* Uses dynamic imports so consumers only need to install the provider packages they use.
|
|
581
|
-
*/
|
|
582
|
-
async getModel(requestModel) {
|
|
583
|
-
const modelId = requestModel || this.config.model || this.getDefaultModel();
|
|
584
|
-
const apiKey = this.config.apiKey;
|
|
585
|
-
switch (this.config.type) {
|
|
586
|
-
case "openai": {
|
|
587
|
-
const { createOpenAI } = await import('@ai-sdk/openai').catch(() => {
|
|
588
|
-
throw new Error(
|
|
589
|
-
"To use the OpenAI provider, install its adapter: npm install @ai-sdk/openai"
|
|
590
|
-
);
|
|
591
|
-
});
|
|
592
|
-
return createOpenAI(apiKey ? { apiKey } : {})(modelId);
|
|
593
|
-
}
|
|
594
|
-
case "anthropic": {
|
|
595
|
-
const { createAnthropic } = await import('@ai-sdk/anthropic').catch(() => {
|
|
596
|
-
throw new Error(
|
|
597
|
-
"To use the Anthropic provider, install its adapter: npm install @ai-sdk/anthropic"
|
|
598
|
-
);
|
|
599
|
-
});
|
|
600
|
-
return createAnthropic(apiKey ? { apiKey } : {})(modelId);
|
|
601
|
-
}
|
|
602
|
-
case "google": {
|
|
603
|
-
const { createGoogleGenerativeAI } = await import('@ai-sdk/google').catch(() => {
|
|
604
|
-
throw new Error(
|
|
605
|
-
"To use the Google provider, install its adapter: npm install @ai-sdk/google"
|
|
606
|
-
);
|
|
607
|
-
});
|
|
608
|
-
return createGoogleGenerativeAI(apiKey ? { apiKey } : {})(modelId);
|
|
609
|
-
}
|
|
610
|
-
default:
|
|
611
|
-
throw new Error(`Unsupported provider type: ${this.config.type}`);
|
|
612
|
-
}
|
|
613
|
-
}
|
|
614
|
-
/**
|
|
615
|
-
* Get default model for the configured provider
|
|
616
|
-
*/
|
|
617
|
-
getDefaultModel() {
|
|
618
|
-
const providerType = this.config.type;
|
|
619
|
-
if (providerType === "custom") {
|
|
620
|
-
throw new Error("Cannot get default model for custom provider type");
|
|
621
|
-
}
|
|
622
|
-
return DEFAULT_MODELS[providerType];
|
|
623
|
-
}
|
|
624
|
-
};
|
|
625
|
-
function createProvider(config) {
|
|
626
|
-
if (config.type === "custom" && config.customProvider) {
|
|
627
|
-
return config.customProvider;
|
|
628
|
-
}
|
|
629
|
-
return new VercelAIProvider(config);
|
|
630
|
-
}
|
|
631
729
|
var VocabularyComplexitySchema = z.object({
|
|
632
730
|
tier_2_words: z.string().describe("List of Tier 2 words (academic words)"),
|
|
633
731
|
tier_3_words: z.string().describe("List of Tier 3 words (domain-specific)"),
|
|
@@ -821,6 +919,44 @@ function featuresToJSON(features, decimals = 1, castToInt = true) {
|
|
|
821
919
|
}
|
|
822
920
|
return JSON.stringify(payload, null, 2);
|
|
823
921
|
}
|
|
922
|
+
var LIBRARY_ADAPTERS = {
|
|
923
|
+
"text-readability": {
|
|
924
|
+
call(fnName, text) {
|
|
925
|
+
const fn = textReadability[fnName];
|
|
926
|
+
if (typeof fn !== "function") {
|
|
927
|
+
throw new Error(`Function "${fnName}" not found in text-readability.`);
|
|
928
|
+
}
|
|
929
|
+
return fn.call(textReadability, text);
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
};
|
|
933
|
+
var POST_TRANSFORMS = {
|
|
934
|
+
round(value, { precision = 0 }) {
|
|
935
|
+
const factor = 10 ** precision;
|
|
936
|
+
return Math.round(value * factor) / factor;
|
|
937
|
+
}
|
|
938
|
+
};
|
|
939
|
+
function runPreprocessingStep(text, impl) {
|
|
940
|
+
const adapter = LIBRARY_ADAPTERS[impl.library];
|
|
941
|
+
if (!adapter) {
|
|
942
|
+
const supported = Object.keys(LIBRARY_ADAPTERS).join(", ");
|
|
943
|
+
throw new Error(
|
|
944
|
+
`Unsupported preprocessing library "${impl.library}". Supported: ${supported}.`
|
|
945
|
+
);
|
|
946
|
+
}
|
|
947
|
+
let result = adapter.call(impl.function, text);
|
|
948
|
+
if (impl.post_transform) {
|
|
949
|
+
const transform = POST_TRANSFORMS[impl.post_transform.type];
|
|
950
|
+
if (!transform) {
|
|
951
|
+
const supported = Object.keys(POST_TRANSFORMS).join(", ");
|
|
952
|
+
throw new Error(
|
|
953
|
+
`Unsupported post_transform type "${impl.post_transform.type}". Supported: ${supported}.`
|
|
954
|
+
);
|
|
955
|
+
}
|
|
956
|
+
result = transform(result, impl.post_transform);
|
|
957
|
+
}
|
|
958
|
+
return result;
|
|
959
|
+
}
|
|
824
960
|
|
|
825
961
|
// ../../evals/prompts/vocabulary/background-knowledge.txt
|
|
826
962
|
var background_knowledge_default = `
|
|
@@ -1126,32 +1262,28 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1126
1262
|
name: "Vocabulary",
|
|
1127
1263
|
description: "Evaluates vocabulary complexity of educational texts relative to grade level",
|
|
1128
1264
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1129
|
-
|
|
1130
|
-
requiresOpenAIKey: true
|
|
1265
|
+
defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
|
|
1131
1266
|
};
|
|
1132
1267
|
grades34ComplexityProvider;
|
|
1133
1268
|
otherGradesComplexityProvider;
|
|
1134
1269
|
backgroundKnowledgeProvider;
|
|
1135
1270
|
constructor(config) {
|
|
1136
1271
|
super(config);
|
|
1137
|
-
this.grades34ComplexityProvider =
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
apiKey: config.openaiApiKey,
|
|
1153
|
-
maxRetries: this.config.maxRetries
|
|
1154
|
-
});
|
|
1272
|
+
this.grades34ComplexityProvider = this.createConfiguredProvider(
|
|
1273
|
+
"google" /* Google */,
|
|
1274
|
+
"gemini-2.5-pro",
|
|
1275
|
+
config.googleApiKey
|
|
1276
|
+
);
|
|
1277
|
+
this.otherGradesComplexityProvider = this.createConfiguredProvider(
|
|
1278
|
+
"openai" /* OpenAI */,
|
|
1279
|
+
"gpt-4.1-2025-04-14",
|
|
1280
|
+
config.openaiApiKey
|
|
1281
|
+
);
|
|
1282
|
+
this.backgroundKnowledgeProvider = this.createConfiguredProvider(
|
|
1283
|
+
"openai" /* OpenAI */,
|
|
1284
|
+
"gpt-4o-2024-11-20",
|
|
1285
|
+
config.openaiApiKey
|
|
1286
|
+
);
|
|
1155
1287
|
}
|
|
1156
1288
|
/**
|
|
1157
1289
|
* Evaluate vocabulary complexity for a given text and grade level
|
|
@@ -1160,6 +1292,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1160
1292
|
* @param grade - The target grade level (3-12)
|
|
1161
1293
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1162
1294
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1295
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1163
1296
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1164
1297
|
*/
|
|
1165
1298
|
async evaluate(text, grade) {
|
|
@@ -1171,7 +1304,9 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1171
1304
|
});
|
|
1172
1305
|
const startTime = Date.now();
|
|
1173
1306
|
const stageDetails = [];
|
|
1174
|
-
const
|
|
1307
|
+
const complexityProviderLabel = grade === "3" || grade === "4" ? this.grades34ComplexityProvider.label : this.otherGradesComplexityProvider.label;
|
|
1308
|
+
const backgroundProviderLabel = this.backgroundKnowledgeProvider.label;
|
|
1309
|
+
const modelLabel = this.config.modelOverride ? backgroundProviderLabel : `${backgroundProviderLabel}+${complexityProviderLabel}`;
|
|
1175
1310
|
try {
|
|
1176
1311
|
this.validateText(text);
|
|
1177
1312
|
this.validateGrade(grade, new Set(_VocabularyEvaluator.metadata.supportedGrades));
|
|
@@ -1182,7 +1317,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1182
1317
|
const bgResponse = await this.getBackgroundKnowledgeAssumption(text, grade);
|
|
1183
1318
|
stageDetails.push({
|
|
1184
1319
|
stage: "background_knowledge",
|
|
1185
|
-
provider:
|
|
1320
|
+
provider: backgroundProviderLabel,
|
|
1186
1321
|
latency_ms: bgResponse.latencyMs,
|
|
1187
1322
|
token_usage: {
|
|
1188
1323
|
input_tokens: bgResponse.usage.inputTokens,
|
|
@@ -1198,7 +1333,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1198
1333
|
);
|
|
1199
1334
|
stageDetails.push({
|
|
1200
1335
|
stage: "complexity_evaluation",
|
|
1201
|
-
provider:
|
|
1336
|
+
provider: complexityProviderLabel,
|
|
1202
1337
|
latency_ms: complexityResponse.latencyMs,
|
|
1203
1338
|
token_usage: {
|
|
1204
1339
|
input_tokens: complexityResponse.usage.inputTokens,
|
|
@@ -1214,8 +1349,10 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1214
1349
|
score: complexityResponse.data.complexity_score,
|
|
1215
1350
|
reasoning: complexityResponse.data.reasoning,
|
|
1216
1351
|
metadata: {
|
|
1217
|
-
model:
|
|
1218
|
-
processingTimeMs: latencyMs
|
|
1352
|
+
model: modelLabel,
|
|
1353
|
+
processingTimeMs: latencyMs,
|
|
1354
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
1355
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1219
1356
|
},
|
|
1220
1357
|
_internal: complexityResponse.data
|
|
1221
1358
|
};
|
|
@@ -1224,7 +1361,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1224
1361
|
latencyMs,
|
|
1225
1362
|
textLength: text.length,
|
|
1226
1363
|
grade,
|
|
1227
|
-
provider:
|
|
1364
|
+
provider: modelLabel,
|
|
1228
1365
|
tokenUsage: totalTokenUsage,
|
|
1229
1366
|
metadata: {
|
|
1230
1367
|
stage_details: stageDetails
|
|
@@ -1259,7 +1396,7 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1259
1396
|
latencyMs,
|
|
1260
1397
|
textLength: text.length,
|
|
1261
1398
|
grade,
|
|
1262
|
-
provider:
|
|
1399
|
+
provider: modelLabel,
|
|
1263
1400
|
tokenUsage: totalTokenUsage,
|
|
1264
1401
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1265
1402
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -1430,25 +1567,12 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1430
1567
|
name: "Sentence Structure",
|
|
1431
1568
|
description: "Evaluates sentence structure complexity based on grammatical features",
|
|
1432
1569
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1433
|
-
|
|
1434
|
-
requiresOpenAIKey: true
|
|
1570
|
+
defaultProviders: ["openai" /* OpenAI */]
|
|
1435
1571
|
};
|
|
1436
|
-
|
|
1437
|
-
complexityProvider;
|
|
1572
|
+
provider;
|
|
1438
1573
|
constructor(config) {
|
|
1439
1574
|
super(config);
|
|
1440
|
-
this.
|
|
1441
|
-
type: "openai",
|
|
1442
|
-
model: "gpt-4o",
|
|
1443
|
-
apiKey: config.openaiApiKey,
|
|
1444
|
-
maxRetries: this.config.maxRetries
|
|
1445
|
-
});
|
|
1446
|
-
this.complexityProvider = createProvider({
|
|
1447
|
-
type: "openai",
|
|
1448
|
-
model: "gpt-4o",
|
|
1449
|
-
apiKey: config.openaiApiKey,
|
|
1450
|
-
maxRetries: this.config.maxRetries
|
|
1451
|
-
});
|
|
1575
|
+
this.provider = this.createConfiguredProvider("openai" /* OpenAI */, "gpt-4o", config.openaiApiKey);
|
|
1452
1576
|
}
|
|
1453
1577
|
/**
|
|
1454
1578
|
* Evaluate sentence structure complexity for a given text and grade level
|
|
@@ -1457,6 +1581,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1457
1581
|
* @param grade - The target grade level (3-12)
|
|
1458
1582
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1459
1583
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1584
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1460
1585
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1461
1586
|
*/
|
|
1462
1587
|
async evaluate(text, grade) {
|
|
@@ -1478,7 +1603,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1478
1603
|
const analysisResponse = await this.analyzeSentenceStructure(text);
|
|
1479
1604
|
stageDetails.push({
|
|
1480
1605
|
stage: "sentence_analysis",
|
|
1481
|
-
provider:
|
|
1606
|
+
provider: this.provider.label,
|
|
1482
1607
|
latency_ms: analysisResponse.latencyMs,
|
|
1483
1608
|
token_usage: {
|
|
1484
1609
|
input_tokens: analysisResponse.usage.inputTokens,
|
|
@@ -1493,7 +1618,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1493
1618
|
const complexityResponse = await this.classifyComplexity(features, grade, text);
|
|
1494
1619
|
stageDetails.push({
|
|
1495
1620
|
stage: "complexity_classification",
|
|
1496
|
-
provider:
|
|
1621
|
+
provider: this.provider.label,
|
|
1497
1622
|
latency_ms: complexityResponse.latencyMs,
|
|
1498
1623
|
token_usage: {
|
|
1499
1624
|
input_tokens: complexityResponse.usage.inputTokens,
|
|
@@ -1509,8 +1634,10 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1509
1634
|
score: complexityResponse.data.answer,
|
|
1510
1635
|
reasoning: complexityResponse.data.reasoning,
|
|
1511
1636
|
metadata: {
|
|
1512
|
-
model:
|
|
1513
|
-
processingTimeMs: latencyMs
|
|
1637
|
+
model: this.provider.label,
|
|
1638
|
+
processingTimeMs: latencyMs,
|
|
1639
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
1640
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1514
1641
|
},
|
|
1515
1642
|
_internal: {
|
|
1516
1643
|
sentenceAnalysis: analysisResponse.data,
|
|
@@ -1523,7 +1650,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1523
1650
|
latencyMs,
|
|
1524
1651
|
textLength: text.length,
|
|
1525
1652
|
grade,
|
|
1526
|
-
provider:
|
|
1653
|
+
provider: this.provider.label,
|
|
1527
1654
|
tokenUsage: totalTokenUsage,
|
|
1528
1655
|
metadata: {
|
|
1529
1656
|
stage_details: stageDetails
|
|
@@ -1558,7 +1685,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1558
1685
|
latencyMs,
|
|
1559
1686
|
textLength: text.length,
|
|
1560
1687
|
grade,
|
|
1561
|
-
provider:
|
|
1688
|
+
provider: this.provider.label,
|
|
1562
1689
|
tokenUsage: totalTokenUsage,
|
|
1563
1690
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1564
1691
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -1586,7 +1713,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1586
1713
|
`flesch_kincaid_grade: ${metrics.fleschKincaidGrade}`
|
|
1587
1714
|
].join("\n");
|
|
1588
1715
|
const userPrompt = getUserPromptAnalysis(text, gtCountsStr);
|
|
1589
|
-
const response = await this.
|
|
1716
|
+
const response = await this.provider.generateStructured({
|
|
1590
1717
|
messages: [
|
|
1591
1718
|
{ role: "system", content: getSystemPromptAnalysis() },
|
|
1592
1719
|
{ role: "user", content: userPrompt }
|
|
@@ -1608,7 +1735,7 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1608
1735
|
async classifyComplexity(features, grade, excerpt) {
|
|
1609
1736
|
const featuresJSON = featuresToJSON(features, 1, true);
|
|
1610
1737
|
const userPrompt = getUserPromptComplexity(featuresJSON, grade, excerpt);
|
|
1611
|
-
const response = await this.
|
|
1738
|
+
const response = await this.provider.generateStructured({
|
|
1612
1739
|
messages: [
|
|
1613
1740
|
{ role: "system", content: getSystemPromptComplexity() },
|
|
1614
1741
|
{ role: "user", content: userPrompt }
|
|
@@ -1659,18 +1786,16 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1659
1786
|
description: "Determines appropriate grade level for text with scaffolding recommendations",
|
|
1660
1787
|
supportedGrades: [],
|
|
1661
1788
|
// No grade parameter required - evaluates what grade the text is appropriate for
|
|
1662
|
-
|
|
1663
|
-
requiresOpenAIKey: false
|
|
1789
|
+
defaultProviders: ["google" /* Google */]
|
|
1664
1790
|
};
|
|
1665
1791
|
provider;
|
|
1666
1792
|
constructor(config) {
|
|
1667
1793
|
super(config);
|
|
1668
|
-
this.provider =
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
});
|
|
1794
|
+
this.provider = this.createConfiguredProvider(
|
|
1795
|
+
"google" /* Google */,
|
|
1796
|
+
"gemini-2.5-pro",
|
|
1797
|
+
config.googleApiKey
|
|
1798
|
+
);
|
|
1674
1799
|
}
|
|
1675
1800
|
/**
|
|
1676
1801
|
* Evaluate grade level appropriateness for a given text
|
|
@@ -1678,6 +1803,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1678
1803
|
* @param text - The text to evaluate
|
|
1679
1804
|
* @returns Evaluation result with grade recommendations and scaffolding suggestions
|
|
1680
1805
|
* @throws {ValidationError} If text is empty or too short/long
|
|
1806
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1681
1807
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1682
1808
|
*/
|
|
1683
1809
|
async evaluate(text) {
|
|
@@ -1711,8 +1837,10 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1711
1837
|
score: response.data.grade,
|
|
1712
1838
|
reasoning: response.data.reasoning,
|
|
1713
1839
|
metadata: {
|
|
1714
|
-
model:
|
|
1715
|
-
processingTimeMs: latencyMs
|
|
1840
|
+
model: this.provider.label,
|
|
1841
|
+
processingTimeMs: latencyMs,
|
|
1842
|
+
inputTokens: tokenUsage.input_tokens,
|
|
1843
|
+
outputTokens: tokenUsage.output_tokens
|
|
1716
1844
|
},
|
|
1717
1845
|
_internal: response.data
|
|
1718
1846
|
};
|
|
@@ -1720,7 +1848,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1720
1848
|
status: "success",
|
|
1721
1849
|
latencyMs,
|
|
1722
1850
|
textLength: text.length,
|
|
1723
|
-
provider:
|
|
1851
|
+
provider: this.provider.label,
|
|
1724
1852
|
tokenUsage,
|
|
1725
1853
|
// No metadata.stage_details for single-stage evaluator
|
|
1726
1854
|
inputText: text
|
|
@@ -1745,7 +1873,7 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1745
1873
|
status: "error",
|
|
1746
1874
|
latencyMs,
|
|
1747
1875
|
textLength: text.length,
|
|
1748
|
-
provider:
|
|
1876
|
+
provider: this.provider.label,
|
|
1749
1877
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1750
1878
|
inputText: text
|
|
1751
1879
|
}).catch(() => {
|
|
@@ -1860,18 +1988,16 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1860
1988
|
name: "Subject Matter Knowledge",
|
|
1861
1989
|
description: "Evaluates background knowledge demands of educational texts relative to grade level",
|
|
1862
1990
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1863
|
-
|
|
1864
|
-
requiresOpenAIKey: false
|
|
1991
|
+
defaultProviders: ["google" /* Google */]
|
|
1865
1992
|
};
|
|
1866
1993
|
provider;
|
|
1867
1994
|
constructor(config) {
|
|
1868
1995
|
super(config);
|
|
1869
|
-
this.provider =
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
});
|
|
1996
|
+
this.provider = this.createConfiguredProvider(
|
|
1997
|
+
"google" /* Google */,
|
|
1998
|
+
"gemini-3-flash-preview",
|
|
1999
|
+
config.googleApiKey
|
|
2000
|
+
);
|
|
1875
2001
|
}
|
|
1876
2002
|
/**
|
|
1877
2003
|
* Evaluate subject matter knowledge complexity for a given text and grade level
|
|
@@ -1880,6 +2006,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1880
2006
|
* @param grade - The target grade level (3-12)
|
|
1881
2007
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
1882
2008
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2009
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
1883
2010
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1884
2011
|
*/
|
|
1885
2012
|
async evaluate(text, grade) {
|
|
@@ -1902,7 +2029,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1902
2029
|
const response = await this.evaluateSmk(text, grade, fkScore);
|
|
1903
2030
|
stageDetails.push({
|
|
1904
2031
|
stage: "smk_evaluation",
|
|
1905
|
-
provider:
|
|
2032
|
+
provider: this.provider.label,
|
|
1906
2033
|
latency_ms: response.latencyMs,
|
|
1907
2034
|
token_usage: {
|
|
1908
2035
|
input_tokens: response.usage.inputTokens,
|
|
@@ -1918,8 +2045,10 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1918
2045
|
score: response.data.complexity_score,
|
|
1919
2046
|
reasoning: response.data.reasoning,
|
|
1920
2047
|
metadata: {
|
|
1921
|
-
model:
|
|
1922
|
-
processingTimeMs: latencyMs
|
|
2048
|
+
model: this.provider.label,
|
|
2049
|
+
processingTimeMs: latencyMs,
|
|
2050
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
2051
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1923
2052
|
},
|
|
1924
2053
|
_internal: response.data
|
|
1925
2054
|
};
|
|
@@ -1928,7 +2057,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1928
2057
|
latencyMs,
|
|
1929
2058
|
textLength: text.length,
|
|
1930
2059
|
grade,
|
|
1931
|
-
provider:
|
|
2060
|
+
provider: this.provider.label,
|
|
1932
2061
|
tokenUsage: totalTokenUsage,
|
|
1933
2062
|
metadata: {
|
|
1934
2063
|
stage_details: stageDetails
|
|
@@ -1963,7 +2092,7 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
1963
2092
|
latencyMs,
|
|
1964
2093
|
textLength: text.length,
|
|
1965
2094
|
grade,
|
|
1966
|
-
provider:
|
|
2095
|
+
provider: this.provider.label,
|
|
1967
2096
|
tokenUsage: totalTokenUsage,
|
|
1968
2097
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1969
2098
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -2071,18 +2200,16 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2071
2200
|
name: "Conventionality",
|
|
2072
2201
|
description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
|
|
2073
2202
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
2074
|
-
|
|
2075
|
-
requiresOpenAIKey: false
|
|
2203
|
+
defaultProviders: ["google" /* Google */]
|
|
2076
2204
|
};
|
|
2077
2205
|
provider;
|
|
2078
2206
|
constructor(config) {
|
|
2079
2207
|
super(config);
|
|
2080
|
-
this.provider =
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
});
|
|
2208
|
+
this.provider = this.createConfiguredProvider(
|
|
2209
|
+
"google" /* Google */,
|
|
2210
|
+
"gemini-3-flash-preview",
|
|
2211
|
+
config.googleApiKey
|
|
2212
|
+
);
|
|
2086
2213
|
}
|
|
2087
2214
|
/**
|
|
2088
2215
|
* Evaluate conventionality complexity for a given text and grade level
|
|
@@ -2091,6 +2218,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2091
2218
|
* @param grade - The target grade level (3-12)
|
|
2092
2219
|
* @returns Evaluation result with complexity score and detailed analysis
|
|
2093
2220
|
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2221
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2094
2222
|
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
2095
2223
|
*/
|
|
2096
2224
|
async evaluate(text, grade) {
|
|
@@ -2113,7 +2241,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2113
2241
|
const response = await this.evaluateConventionality(text, grade, fkScore);
|
|
2114
2242
|
stageDetails.push({
|
|
2115
2243
|
stage: "conventionality_evaluation",
|
|
2116
|
-
provider:
|
|
2244
|
+
provider: this.provider.label,
|
|
2117
2245
|
latency_ms: response.latencyMs,
|
|
2118
2246
|
token_usage: {
|
|
2119
2247
|
input_tokens: response.usage.inputTokens,
|
|
@@ -2129,8 +2257,10 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2129
2257
|
score: response.data.complexity_score,
|
|
2130
2258
|
reasoning: response.data.reasoning,
|
|
2131
2259
|
metadata: {
|
|
2132
|
-
model:
|
|
2133
|
-
processingTimeMs: latencyMs
|
|
2260
|
+
model: this.provider.label,
|
|
2261
|
+
processingTimeMs: latencyMs,
|
|
2262
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
2263
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
2134
2264
|
},
|
|
2135
2265
|
_internal: response.data
|
|
2136
2266
|
};
|
|
@@ -2139,7 +2269,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2139
2269
|
latencyMs,
|
|
2140
2270
|
textLength: text.length,
|
|
2141
2271
|
grade,
|
|
2142
|
-
provider:
|
|
2272
|
+
provider: this.provider.label,
|
|
2143
2273
|
tokenUsage: totalTokenUsage,
|
|
2144
2274
|
metadata: {
|
|
2145
2275
|
stage_details: stageDetails
|
|
@@ -2174,7 +2304,7 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2174
2304
|
latencyMs,
|
|
2175
2305
|
textLength: text.length,
|
|
2176
2306
|
grade,
|
|
2177
|
-
provider:
|
|
2307
|
+
provider: this.provider.label,
|
|
2178
2308
|
tokenUsage: totalTokenUsage,
|
|
2179
2309
|
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
2180
2310
|
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
@@ -2216,8 +2346,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
|
|
|
2216
2346
|
name: "Text Complexity",
|
|
2217
2347
|
description: "Composite evaluator analyzing vocabulary, sentence structure, subject matter knowledge, and conventionality complexity",
|
|
2218
2348
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
2219
|
-
|
|
2220
|
-
requiresOpenAIKey: true
|
|
2349
|
+
defaultProviders: ["google" /* Google */, "openai" /* OpenAI */]
|
|
2221
2350
|
};
|
|
2222
2351
|
vocabularyEvaluator;
|
|
2223
2352
|
sentenceStructureEvaluator;
|
|
@@ -2242,7 +2371,8 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
|
|
|
2242
2371
|
* @param text - The text to evaluate
|
|
2243
2372
|
* @param grade - The target grade level (3-12)
|
|
2244
2373
|
* @returns Map of sub-evaluator results
|
|
2245
|
-
* @throws {ValidationError} If text is empty or grade is invalid
|
|
2374
|
+
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2375
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2246
2376
|
* @throws {Error} If all sub-evaluators fail
|
|
2247
2377
|
*/
|
|
2248
2378
|
async evaluate(text, grade) {
|
|
@@ -2289,7 +2419,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
|
|
|
2289
2419
|
latencyMs,
|
|
2290
2420
|
textLength: text.length,
|
|
2291
2421
|
grade,
|
|
2292
|
-
provider: "composite:google+openai",
|
|
2422
|
+
provider: this.config.modelOverride ? `${this.config.modelOverride.provider}:${this.config.modelOverride.model}` : "composite:google+openai",
|
|
2293
2423
|
errorCode: hasFailures ? "PartialFailure" : void 0,
|
|
2294
2424
|
inputText: text
|
|
2295
2425
|
}).catch(() => {
|
|
@@ -2320,6 +2450,280 @@ async function evaluateTextComplexity(text, grade, config) {
|
|
|
2320
2450
|
return evaluator.evaluate(text, grade);
|
|
2321
2451
|
}
|
|
2322
2452
|
|
|
2323
|
-
|
|
2453
|
+
// ../../evals/prompts/purpose/system.txt
|
|
2454
|
+
var system_default4 = '\n Role\n You are an expert reading assessment evaluator. Your task is to determine the Text Complexity of a given passage based exclusively on the Purpose dimension of the qualitative measures rubric.\n\n Task Details\n You will be provided with an informational or literary `text`, along with its `grade_level` and `fk_score` (Flesch-Kincaid). You must analyze the text and determine how difficult it is for a reader to identify the author\'s purpose. \n\n Crucially, you must distinguish between the text\'s *topic* (what it is about) and its *purpose* (why the author wrote it). \n\n Rubric: Purpose Complexity\n Exceedingly Complex: Subtle and intricate, difficult to determine; includes many theoretical or abstract elements.\n Very Complex: Implicit or subtle but fairly easy to infer; more theoretical or abstract than concrete.\n Moderately Complex: Implied but easy to identify based upon context or source.\n Slightly Complex: Explicitly stated, clear, concrete, narrowly focused.\n More Context Needed: The text is a fragment or lacks necessary introductory context, making the true purpose impossible to determine accurately without external background knowledge.\n\n Expert Rules for Evaluating Purpose\n Based on expert consensus and historical grading corrections, you must apply the following heuristics:\n\n 1. The "Slightly Complex" Benchmark (Straightforward and Explicit)\n A text is Slightly Complex if its purpose is explicitly stated or if its informative intent is straightforward, clear, concrete, and directly answers what the text is immediately about. If the text opens by clearly identifying a concrete topic (e.g., "Pins are made of either brass or iron wire") and rigidly follows through by explaining factual, practical information or a process (like manufacturing steps or geographic facts), the purpose is considered explicit and straightforward. It does *not* require a literal statement like "The purpose of this text is to..." as long as the delivery of information is direct, clear, and unadorned by persuasive elements or complex framing.\n\n 2. Moderately Complex via Guiding Questions & Inquiry Formats\n If a text begins with a general introduction and uses guiding questions (e.g., "Have you ever wondered how clouds are formed?") to transition into an explanation, the purpose is implied rather than explicitly stated upfront. Because the reader must recognize the question as the pivot point for the author\'s intent, it is Moderately Complex.\n\n 3. Moderately Complex via Multiple Distinct Informational Goals\n If a text covers a broad topic but jumps between several distinct scientific or informational objectives without an overarching framing device or explicit thesis (e.g., talking about measuring ice sheets, then mapping, then finding meteorites), the reader must synthesize these diverse facts to recognize the broader purpose, making it Moderately Complex.\n\n 4. Moderately Complex via Arguments Disguised as Information\n If an author is arguing a specific point, correcting a misconception, or defending a stance, but the text could initially be mistaken by students as purely informative factual text, it is Moderately Complex. The reader must infer the persuasive intent or argumentative purpose beneath the informative tone.\n\n 5. "More Context Needed" for Fragments\n If a text is a fragment missing a crucial introduction or context, and identifying the author\'s purpose beyond a simple surface-level description would be exceptionally difficult for a reader in the target grade level without that external background, score it as `more_context_needed`. \n\n Output Format\n Provide your evaluation in the following structure:\n reasoning:\n - Surface Analysis: Identify if the text clearly identifies its topic and delivers straightforward facts, or if it utilizes structural cues, titles, or direct thesis statements.\n - Subtlety & Framing: Is the informative purpose straightforward and concrete? Does it use guiding questions? Is it an argument disguised as pure information? Are there multiple distinct informational goals requiring synthesis?\n - Context Check: Is this text a fragment missing crucial context that obscures the deeper purpose for the target grade level?\n - Rubric Alignment: Explain how the text aligns with the specific language of the rubric, explicitly referencing the expert rules above. Justify why it isn\'t one level simpler or more complex.\n\n answer:\n - complexity_score: (slightly_complex, moderately_complex, very_complex, exceedingly_complex, more_context_needed)\n - reasoning: A brief summary of your final decision.\n - details: Structured breakdown of PurposeDetails including detailed_summary, adjustment_and_scaffolding, and recommended_use_cases.\n';
|
|
2455
|
+
|
|
2456
|
+
// ../../evals/prompts/purpose/user.txt
|
|
2457
|
+
var user_default4 = "Analyze:\nText: {text}\nGrade: {grade_level}\nFK Score: {fk_score}";
|
|
2458
|
+
|
|
2459
|
+
// ../../evals/prompts/purpose/config.json
|
|
2460
|
+
var config_default = {
|
|
2461
|
+
evaluator: {
|
|
2462
|
+
id: "literacy.gla.purpose",
|
|
2463
|
+
name: "Purpose Dimension Text Complexity Evaluator",
|
|
2464
|
+
description: "Evaluates the Purpose dimension of qualitative text complexity for K-12 reading assessment, producing a 5-level rubric rating with structured pedagogical detail."
|
|
2465
|
+
},
|
|
2466
|
+
preprocessing: [
|
|
2467
|
+
{
|
|
2468
|
+
id: "fk_score",
|
|
2469
|
+
kind: "flesch_kincaid_grade",
|
|
2470
|
+
description: "Compute the Flesch-Kincaid Grade Level for the input text and bind it to {fk_score} in the prompt.",
|
|
2471
|
+
input: "text",
|
|
2472
|
+
output: "fk_score",
|
|
2473
|
+
implementation: {
|
|
2474
|
+
python: {
|
|
2475
|
+
library: "textstat",
|
|
2476
|
+
function: "flesch_kincaid_grade",
|
|
2477
|
+
post_transform: {
|
|
2478
|
+
type: "round",
|
|
2479
|
+
precision: 2
|
|
2480
|
+
}
|
|
2481
|
+
},
|
|
2482
|
+
typescript: {
|
|
2483
|
+
library: "text-readability",
|
|
2484
|
+
function: "fleschKincaidGrade",
|
|
2485
|
+
post_transform: {
|
|
2486
|
+
type: "round",
|
|
2487
|
+
precision: 2
|
|
2488
|
+
}
|
|
2489
|
+
}
|
|
2490
|
+
}
|
|
2491
|
+
}
|
|
2492
|
+
],
|
|
2493
|
+
steps: [
|
|
2494
|
+
{
|
|
2495
|
+
id: "evaluate_purpose",
|
|
2496
|
+
description: "Single-call LLM step that produces the EvaluatorOutput JSON.",
|
|
2497
|
+
prompt: {
|
|
2498
|
+
type: "chat",
|
|
2499
|
+
messages: [
|
|
2500
|
+
{
|
|
2501
|
+
role: "system",
|
|
2502
|
+
source_path: "system.txt",
|
|
2503
|
+
sha256: "745b95b7d54dc845b99363c9d3360355381883c22a5f6a0f305d7349cae38a54"
|
|
2504
|
+
},
|
|
2505
|
+
{
|
|
2506
|
+
role: "user",
|
|
2507
|
+
source_path: "user.txt",
|
|
2508
|
+
sha256: "cd8e6347db1a55d104e34436f8f66e833bd6583645d4786a554aaefdd26479b2"
|
|
2509
|
+
}
|
|
2510
|
+
],
|
|
2511
|
+
placeholders: {
|
|
2512
|
+
text: {
|
|
2513
|
+
required: true,
|
|
2514
|
+
source: "input"
|
|
2515
|
+
},
|
|
2516
|
+
grade_level: {
|
|
2517
|
+
required: true,
|
|
2518
|
+
source: "input"
|
|
2519
|
+
},
|
|
2520
|
+
fk_score: {
|
|
2521
|
+
required: true,
|
|
2522
|
+
source: "preprocessing.fk_score"
|
|
2523
|
+
}
|
|
2524
|
+
}
|
|
2525
|
+
},
|
|
2526
|
+
model: {
|
|
2527
|
+
provider: "google",
|
|
2528
|
+
name: "gemini-3-flash-preview"
|
|
2529
|
+
},
|
|
2530
|
+
generation: {
|
|
2531
|
+
temperature: 0
|
|
2532
|
+
},
|
|
2533
|
+
parser: {
|
|
2534
|
+
kind: "structured_output"
|
|
2535
|
+
},
|
|
2536
|
+
output_binding: "formatted_output"
|
|
2537
|
+
}
|
|
2538
|
+
]};
|
|
2539
|
+
|
|
2540
|
+
// src/prompts/purpose/index.ts
|
|
2541
|
+
var STEP_ID = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
|
|
2542
|
+
var _step = config_default.steps.find((s) => s.id === STEP_ID);
|
|
2543
|
+
if (!_step) throw new Error(`Step "${STEP_ID}" not found in purpose config.json`);
|
|
2544
|
+
var PLACEHOLDER_KEYS = Object.keys(_step.prompt.placeholders);
|
|
2545
|
+
function applyPlaceholders(template, inputs) {
|
|
2546
|
+
return PLACEHOLDER_KEYS.reduce(
|
|
2547
|
+
(text, key) => key in inputs ? text.replaceAll(`{${key}}`, inputs[key]) : text,
|
|
2548
|
+
template
|
|
2549
|
+
);
|
|
2550
|
+
}
|
|
2551
|
+
function getSystemPrompt5(inputs) {
|
|
2552
|
+
return applyPlaceholders(system_default4, inputs);
|
|
2553
|
+
}
|
|
2554
|
+
function getUserPrompt5(inputs) {
|
|
2555
|
+
return applyPlaceholders(user_default4, inputs);
|
|
2556
|
+
}
|
|
2557
|
+
|
|
2558
|
+
// ../../evals/prompts/purpose/input_schema.json
|
|
2559
|
+
var input_schema_default = {
|
|
2560
|
+
properties: {
|
|
2561
|
+
grade_level: {
|
|
2562
|
+
minimum: 3,
|
|
2563
|
+
maximum: 12}
|
|
2564
|
+
}
|
|
2565
|
+
};
|
|
2566
|
+
|
|
2567
|
+
// src/evaluators/purpose.ts
|
|
2568
|
+
var STEP_ID2 = `evaluate_${config_default.evaluator.id.split(".").pop()}`;
|
|
2569
|
+
var _step2 = config_default.steps.find((s) => s.id === STEP_ID2);
|
|
2570
|
+
if (!_step2) throw new Error(`Step "${STEP_ID2}" not found in purpose config.json`);
|
|
2571
|
+
var STEP = _step2;
|
|
2572
|
+
var GRADE_MIN = input_schema_default.properties.grade_level.minimum;
|
|
2573
|
+
var GRADE_MAX = input_schema_default.properties.grade_level.maximum;
|
|
2574
|
+
var SUPPORTED_GRADES = Array.from({ length: GRADE_MAX - GRADE_MIN + 1 }, (_, i) => String(GRADE_MIN + i));
|
|
2575
|
+
var COMPLEXITY_SCORE_DISPLAY = {
|
|
2576
|
+
"slightly_complex": "Slightly complex",
|
|
2577
|
+
"moderately_complex": "Moderately complex",
|
|
2578
|
+
"very_complex": "Very complex",
|
|
2579
|
+
"exceedingly_complex": "Exceedingly complex",
|
|
2580
|
+
"more_context_needed": "More context needed"
|
|
2581
|
+
};
|
|
2582
|
+
var PurposeEvaluator = class _PurposeEvaluator extends BaseEvaluator {
|
|
2583
|
+
static metadata = {
|
|
2584
|
+
id: config_default.evaluator.id,
|
|
2585
|
+
name: config_default.evaluator.name,
|
|
2586
|
+
description: config_default.evaluator.description,
|
|
2587
|
+
supportedGrades: SUPPORTED_GRADES,
|
|
2588
|
+
defaultProviders: ["google" /* Google */]
|
|
2589
|
+
};
|
|
2590
|
+
static TEMPERATURE = STEP.generation.temperature;
|
|
2591
|
+
static computeFkScore(text) {
|
|
2592
|
+
const fkStep = config_default.preprocessing.find((p) => p.id === "fk_score");
|
|
2593
|
+
if (!fkStep) throw new Error("fk_score preprocessing step not found in purpose config.json");
|
|
2594
|
+
return runPreprocessingStep(text, fkStep.implementation.typescript);
|
|
2595
|
+
}
|
|
2596
|
+
provider;
|
|
2597
|
+
constructor(config) {
|
|
2598
|
+
super(config);
|
|
2599
|
+
this.provider = this.createConfiguredProvider(
|
|
2600
|
+
"google" /* Google */,
|
|
2601
|
+
STEP.model.name,
|
|
2602
|
+
config.googleApiKey
|
|
2603
|
+
);
|
|
2604
|
+
}
|
|
2605
|
+
/**
|
|
2606
|
+
* Evaluate purpose complexity for a given text and grade level
|
|
2607
|
+
*
|
|
2608
|
+
* @param text - The text to evaluate
|
|
2609
|
+
* @param grade - The target grade level (3-12)
|
|
2610
|
+
* @returns Evaluation result with complexity score and detailed analysis
|
|
2611
|
+
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2612
|
+
* @throws {ConfigurationError} If modelOverride specifies a model ID that the provider rejects
|
|
2613
|
+
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
2614
|
+
*/
|
|
2615
|
+
async evaluate(text, grade) {
|
|
2616
|
+
this.logger.info("Starting Purpose evaluation", {
|
|
2617
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2618
|
+
operation: "evaluate",
|
|
2619
|
+
grade,
|
|
2620
|
+
textLength: text.length
|
|
2621
|
+
});
|
|
2622
|
+
const startTime = Date.now();
|
|
2623
|
+
const stageDetails = [];
|
|
2624
|
+
try {
|
|
2625
|
+
this.validateText(text);
|
|
2626
|
+
const gradeNum = this.parseAndValidateGrade(grade);
|
|
2627
|
+
const fkScore = _PurposeEvaluator.computeFkScore(text);
|
|
2628
|
+
const inputs = {
|
|
2629
|
+
text,
|
|
2630
|
+
grade_level: String(gradeNum),
|
|
2631
|
+
fk_score: String(fkScore)
|
|
2632
|
+
};
|
|
2633
|
+
const response = await this.callLLM(inputs);
|
|
2634
|
+
const latencyMs = Date.now() - startTime;
|
|
2635
|
+
const tokenUsage = {
|
|
2636
|
+
input_tokens: response.usage.inputTokens,
|
|
2637
|
+
output_tokens: response.usage.outputTokens
|
|
2638
|
+
};
|
|
2639
|
+
stageDetails.push({
|
|
2640
|
+
stage: STEP.id,
|
|
2641
|
+
provider: this.provider.label,
|
|
2642
|
+
latency_ms: response.latencyMs,
|
|
2643
|
+
token_usage: tokenUsage
|
|
2644
|
+
});
|
|
2645
|
+
const result = {
|
|
2646
|
+
score: COMPLEXITY_SCORE_DISPLAY[response.data.complexity_score],
|
|
2647
|
+
reasoning: response.data.reasoning,
|
|
2648
|
+
metadata: {
|
|
2649
|
+
model: this.provider.label,
|
|
2650
|
+
processingTimeMs: latencyMs,
|
|
2651
|
+
inputTokens: tokenUsage.input_tokens,
|
|
2652
|
+
outputTokens: tokenUsage.output_tokens
|
|
2653
|
+
},
|
|
2654
|
+
_internal: response.data
|
|
2655
|
+
};
|
|
2656
|
+
this.sendTelemetry({
|
|
2657
|
+
status: "success",
|
|
2658
|
+
latencyMs,
|
|
2659
|
+
textLength: text.length,
|
|
2660
|
+
grade: String(gradeNum),
|
|
2661
|
+
provider: this.provider.label,
|
|
2662
|
+
tokenUsage,
|
|
2663
|
+
metadata: { stage_details: stageDetails },
|
|
2664
|
+
inputText: text
|
|
2665
|
+
}).catch(() => void 0);
|
|
2666
|
+
this.logger.info("Purpose evaluation completed successfully", {
|
|
2667
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2668
|
+
operation: "evaluate",
|
|
2669
|
+
grade: gradeNum,
|
|
2670
|
+
score: result.score,
|
|
2671
|
+
processingTimeMs: latencyMs
|
|
2672
|
+
});
|
|
2673
|
+
return result;
|
|
2674
|
+
} catch (error) {
|
|
2675
|
+
const latencyMs = Date.now() - startTime;
|
|
2676
|
+
this.logger.error("Purpose evaluation failed", {
|
|
2677
|
+
evaluator: _PurposeEvaluator.metadata.id,
|
|
2678
|
+
operation: "evaluate",
|
|
2679
|
+
grade,
|
|
2680
|
+
error: error instanceof Error ? error : void 0,
|
|
2681
|
+
processingTimeMs: latencyMs
|
|
2682
|
+
});
|
|
2683
|
+
const tokenUsage = stageDetails.length > 0 ? {
|
|
2684
|
+
input_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.input_tokens ?? 0), 0),
|
|
2685
|
+
output_tokens: stageDetails.reduce((s, d) => s + (d.token_usage?.output_tokens ?? 0), 0)
|
|
2686
|
+
} : void 0;
|
|
2687
|
+
this.sendTelemetry({
|
|
2688
|
+
status: "error",
|
|
2689
|
+
latencyMs,
|
|
2690
|
+
textLength: text.length,
|
|
2691
|
+
grade: String(grade),
|
|
2692
|
+
provider: this.provider.label,
|
|
2693
|
+
tokenUsage,
|
|
2694
|
+
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
2695
|
+
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
2696
|
+
inputText: text
|
|
2697
|
+
}).catch(() => void 0);
|
|
2698
|
+
if (error instanceof ValidationError) throw error;
|
|
2699
|
+
throw wrapProviderError(error, "Purpose evaluation failed");
|
|
2700
|
+
}
|
|
2701
|
+
}
|
|
2702
|
+
parseAndValidateGrade(grade) {
|
|
2703
|
+
const num = Number(grade.trim());
|
|
2704
|
+
if (!Number.isInteger(num) || num < GRADE_MIN || num > GRADE_MAX) {
|
|
2705
|
+
throw new ValidationError(
|
|
2706
|
+
`Invalid grade "${grade}". Purpose evaluator supports integer grades ${GRADE_MIN}\u2013${GRADE_MAX}.`
|
|
2707
|
+
);
|
|
2708
|
+
}
|
|
2709
|
+
return num;
|
|
2710
|
+
}
|
|
2711
|
+
async callLLM(inputs) {
|
|
2712
|
+
const response = await this.provider.generateStructured({
|
|
2713
|
+
messages: [
|
|
2714
|
+
{ role: "system", content: getSystemPrompt5(inputs) },
|
|
2715
|
+
{ role: "user", content: getUserPrompt5(inputs) }
|
|
2716
|
+
],
|
|
2717
|
+
schema: PurposeOutputSchema,
|
|
2718
|
+
temperature: _PurposeEvaluator.TEMPERATURE
|
|
2719
|
+
});
|
|
2720
|
+
return { data: response.data, usage: response.usage, latencyMs: response.latencyMs };
|
|
2721
|
+
}
|
|
2722
|
+
};
|
|
2723
|
+
async function evaluatePurpose(text, grade, config) {
|
|
2724
|
+
return new PurposeEvaluator(config).evaluate(text, grade);
|
|
2725
|
+
}
|
|
2726
|
+
|
|
2727
|
+
export { APIError, AuthenticationError, ComplexityClassificationSchema, ConfigurationError, ConventionalityEvaluator, EvaluatorError, GradeBand, GradeLevelAppropriatenessEvaluator, GradeLevelAppropriatenessSchema, LogLevel, NetworkError, Provider, Providers, PurposeEvaluator, RateLimitError, SentenceAnalysisSchema, SentenceStructureEvaluator, SmkEvaluator, TextComplexityEvaluator, TextComplexityLevel, TimeoutError, ValidationError, VocabularyEvaluator, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateConventionality, evaluateGradeLevelAppropriateness, evaluatePurpose, evaluateSentenceStructure, evaluateSmk, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };
|
|
2324
2728
|
//# sourceMappingURL=index.js.map
|
|
2325
2729
|
//# sourceMappingURL=index.js.map
|