@learning-commons/evaluators 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -492,7 +492,7 @@ var BaseEvaluator = class {
492
492
  const validList = Array.from(validGrades).sort((a, b) => {
493
493
  if (a === "K") return -1;
494
494
  if (b === "K") return 1;
495
- return parseInt(a) - parseInt(b);
495
+ return parseInt(a, 10) - parseInt(b, 10);
496
496
  }).join(", ");
497
497
  throw new ValidationError(
498
498
  `Invalid grade "${grade}". Supported grades for this evaluator: ${validList}`
@@ -2007,11 +2007,222 @@ async function evaluateSmk(text, grade, config) {
2007
2007
  const evaluator = new SmkEvaluator(config);
2008
2008
  return evaluator.evaluate(text, grade);
2009
2009
  }
2010
+ var ConventionalityOutputSchema = zod.z.object({
2011
+ conventionality_features: zod.z.array(zod.z.string()).describe("The specific language features driving the complexity (e.g., literal narrative, concrete actions, sustained irony, abstract qualities) with direct quotes from the text."),
2012
+ grade_context: zod.z.string().describe("How the conventionality demands compare to general expectations for the provided target grade."),
2013
+ instructional_insights: zod.z.string().describe("Actionable pedagogical suggestions for scaffolding the conventionality features in the classroom."),
2014
+ complexity_score: TextComplexityLevel.describe("The conventionality complexity level of the text"),
2015
+ reasoning: zod.z.string().describe("A detailed explanation of the rating, citing specific features in the text and referencing the expert guardrails.")
2016
+ });
2017
+
2018
+ // ../../evals/prompts/conventionality/system.txt
2019
+ var system_default3 = `Role
2020
+ You are an expert reading teacher and text complexity evaluator. Your task is to evaluate the "Conventionality" of a text and assign it a complexity level based on a 4-point scale, carefully factoring in the target grade level.
2021
+
2022
+ Objective
2023
+ Measure how explicit, literal, and straightforward the text's meaning is, versus how abstract, ironic, figurative, or archaic it is. Focus on the hiddenness of the meaning, the use of conceptual framing, the reliance on abstract reasoning, and the familiarity of the expression for the target grade.
2024
+
2025
+ Complexity Levels
2026
+ - Slightly Complex: Explicit, literal, straightforward, easy to understand. Meaning is entirely on the surface. The language is concrete, and the meaning is clear and procedural, mostly referring to observable materials and actions. Contains no symbolic or ironic language, and conceptual interpretation is not required. Contains limited figurative language that is common and easy to comprehend at the target grade level.
2027
+ - Moderately Complex: Largely explicit and easy to understand with some occasions for more complex meaning. May contain a noticeable amount of archaic/dated phrasing, formal historical prose, vocabulary demands, background knowledge requirements, or expressions that are less familiar to the target grade level, which might make the text feel vague or slightly challenging.
2028
+ - Very Complex: Fairly complex; contains sustained abstract language, conceptual framing, rhetorical idealization, ironic comparisons, or central metaphors that drive the meaning of the text. Addresses concepts, beliefs, and abstract qualities rather than just concrete objects. The tone or underlying message requires interpretation, even if the surface message is clear.
2029
+ - Exceedingly Complex: Dense and complex; contains considerable abstract, ironic, and/or figurative language. Meaning is heavily hidden, deeply conceptual, or relies heavily on complex rhetorical devices.
2030
+
2031
+ Essential Evaluation Rules
2032
+ 1. Concrete & Procedural Texts: Texts that are highly concrete, clear, and procedural (e.g., describing observable materials, mechanical processes, or physical actions) should typically be rated "Slightly Complex."
2033
+
2034
+ 2. Grade-Level Anchoring and Vague Narratives: Always consider the target grade. A literal historical narrative that might be straightforward for older students can be "Moderately Complex" for younger students (e.g., 4th graders) if it involves less familiar expressions, older contexts (e.g., wagon loads, traveling by horseback), vocabulary demands, and background knowledge requirements that make the text feel vague or slightly demanding for that age group.
2035
+
2036
+ 3. Rhetorical Idealization and Abstract Qualities: If an entire argument or narrative is built around abstract qualities (e.g., national character, bravery, liberty) and uses repeated figurative language or personification to portray a subject in a certain idealized way, rate the text as "Very Complex." Even if the figurative language is easy to interpret, the need to interpret the rhetorical tone and sustained abstract focus elevates the complexity beyond level two.
2037
+
2038
+ 4. Common Idioms and Grade-Level Appropriateness: Do NOT elevate a text to "Moderately Complex" simply because it contains a few common idiomatic expressions. If these expressions are widely known and easy for the target grade to understand without making the text feel vague, the text remains "Slightly Complex."
2039
+
2040
+ 5. Conversational and Hypothetical Framing: Using a second-person conversational hook (e.g., "Imagine you are...") to explain a concept is a standard, literal device for engaging readers. It does not constitute complex conceptual framing.
2041
+
2042
+ 6. Sustained vs. Occasional Impact: If abstract language, figurative phrasing, irony, or conceptual framing is sustained throughout the text and central to the argument/meaning, the text is Very Complex. Reserve Moderately Complex for texts where the explicit meaning dominates but the expression, vocabulary, or archaic language provides a moderate conventionality challenge.
2043
+
2044
+ 7. Central Metaphors and Conceptual Framing: When an author uses a central metaphor to explain a concept or uses figurative phrasing to explain how things "work," this abstract reasoning drives the meaning, elevating the text to Very Complex.
2045
+
2046
+ 8. Irony and Abstract Comparisons: Texts that rely on sustained irony, especially through comparative arguments, are inherently Very Complex for younger students.
2047
+
2048
+ 9. Isolate Conventionality from Vocabulary: Do not inflate the Conventionality score just because the text uses archaic, dated, or highly academic vocabulary.
2049
+
2050
+ Input Format
2051
+ You will receive:
2052
+ - text: The passage to evaluate.
2053
+ - grade_level: The target student grade level.
2054
+ - fk_score: The Flesch-Kincaid readability score.
2055
+
2056
+ Output Format
2057
+ Provide a JSON object containing ONLY the following keys:
2058
+ - complexity_score: (String) One of the 4 scale levels exactly as formatted: 'slightly_complex', 'moderately_complex', 'very_complex', or 'exceedingly_complex'.
2059
+ - reasoning: (String) A detailed explanation of the rating, citing specific features in the text and referencing the expert guardrails (e.g., noting if the text relies on abstract qualities/rhetorical idealization, if vocabulary/background knowledge demands make a literal text vague for the grade level, or if it is strictly concrete/procedural).
2060
+ - conventionality_features: (List of Strings) The specific language features driving the complexity (e.g., literal narrative, concrete actions, less familiar expressions, sustained irony, abstract qualities, rhetorical idealization, archaic phrasing) with direct quotes from the text.
2061
+ - grade_context: (String) How the conventionality demands compare to general expectations for the provided target grade.
2062
+ - instructional_insights: (String) Actionable pedagogical suggestions for scaffolding the conventionality features in the classroom.`;
2063
+
2064
+ // ../../evals/prompts/conventionality/user.txt
2065
+ var user_default3 = "Analyze:\nText: {text}\nGrade: {grade}\nFK Score: {fk_score}";
2066
+
2067
+ // src/prompts/conventionality/index.ts
2068
+ function getSystemPrompt4() {
2069
+ return system_default3;
2070
+ }
2071
+ function getUserPrompt4(text, grade, fkScore) {
2072
+ return user_default3.replaceAll("{text}", text).replaceAll("{grade}", grade).replaceAll("{fk_score}", fkScore.toString());
2073
+ }
2074
+
2075
+ // src/evaluators/conventionality.ts
2076
+ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvaluator {
2077
+ static metadata = {
2078
+ id: "conventionality",
2079
+ name: "Conventionality",
2080
+ description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
2081
+ supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
2082
+ requiresGoogleKey: true,
2083
+ requiresOpenAIKey: false
2084
+ };
2085
+ provider;
2086
+ constructor(config) {
2087
+ super(config);
2088
+ this.provider = createProvider({
2089
+ type: "google",
2090
+ model: "gemini-3-flash-preview",
2091
+ apiKey: config.googleApiKey,
2092
+ maxRetries: this.config.maxRetries
2093
+ });
2094
+ }
2095
+ /**
2096
+ * Evaluate conventionality complexity for a given text and grade level
2097
+ *
2098
+ * @param text - The text to evaluate
2099
+ * @param grade - The target grade level (3-12)
2100
+ * @returns Evaluation result with complexity score and detailed analysis
2101
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2102
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2103
+ */
2104
+ async evaluate(text, grade) {
2105
+ this.logger.info("Starting Conventionality evaluation", {
2106
+ evaluator: "conventionality",
2107
+ operation: "evaluate",
2108
+ grade,
2109
+ textLength: text.length
2110
+ });
2111
+ const startTime = Date.now();
2112
+ const stageDetails = [];
2113
+ try {
2114
+ this.validateText(text);
2115
+ this.validateGrade(grade, new Set(_ConventionalityEvaluator.metadata.supportedGrades));
2116
+ this.logger.debug("Evaluating conventionality complexity", {
2117
+ evaluator: "conventionality",
2118
+ operation: "conventionality_evaluation"
2119
+ });
2120
+ const fkScore = calculateFleschKincaidGrade(text);
2121
+ const response = await this.evaluateConventionality(text, grade, fkScore);
2122
+ stageDetails.push({
2123
+ stage: "conventionality_evaluation",
2124
+ provider: "google:gemini-3-flash-preview",
2125
+ latency_ms: response.latencyMs,
2126
+ token_usage: {
2127
+ input_tokens: response.usage.inputTokens,
2128
+ output_tokens: response.usage.outputTokens
2129
+ }
2130
+ });
2131
+ const latencyMs = Date.now() - startTime;
2132
+ const totalTokenUsage = {
2133
+ input_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.input_tokens || 0), 0),
2134
+ output_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.output_tokens || 0), 0)
2135
+ };
2136
+ const result = {
2137
+ score: response.data.complexity_score,
2138
+ reasoning: response.data.reasoning,
2139
+ metadata: {
2140
+ model: "google:gemini-3-flash-preview",
2141
+ processingTimeMs: latencyMs
2142
+ },
2143
+ _internal: response.data
2144
+ };
2145
+ this.sendTelemetry({
2146
+ status: "success",
2147
+ latencyMs,
2148
+ textLength: text.length,
2149
+ grade,
2150
+ provider: "google:gemini-3-flash-preview",
2151
+ tokenUsage: totalTokenUsage,
2152
+ metadata: {
2153
+ stage_details: stageDetails
2154
+ },
2155
+ inputText: text
2156
+ }).catch(() => {
2157
+ });
2158
+ this.logger.info("Conventionality evaluation completed successfully", {
2159
+ evaluator: "conventionality",
2160
+ operation: "evaluate",
2161
+ grade,
2162
+ score: result.score,
2163
+ processingTimeMs: latencyMs
2164
+ });
2165
+ return result;
2166
+ } catch (error) {
2167
+ const latencyMs = Date.now() - startTime;
2168
+ this.logger.error("Conventionality evaluation failed", {
2169
+ evaluator: "conventionality",
2170
+ operation: "evaluate",
2171
+ grade,
2172
+ error: error instanceof Error ? error : void 0,
2173
+ processingTimeMs: latencyMs,
2174
+ completedStages: stageDetails.length
2175
+ });
2176
+ const totalTokenUsage = stageDetails.length > 0 ? {
2177
+ input_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.input_tokens || 0), 0),
2178
+ output_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.output_tokens || 0), 0)
2179
+ } : void 0;
2180
+ this.sendTelemetry({
2181
+ status: "error",
2182
+ latencyMs,
2183
+ textLength: text.length,
2184
+ grade,
2185
+ provider: "google:gemini-3-flash-preview",
2186
+ tokenUsage: totalTokenUsage,
2187
+ errorCode: error instanceof Error ? error.name : "UnknownError",
2188
+ metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
2189
+ inputText: text
2190
+ }).catch(() => {
2191
+ });
2192
+ if (error instanceof ValidationError) {
2193
+ throw error;
2194
+ }
2195
+ throw wrapProviderError(error, "Conventionality evaluation failed");
2196
+ }
2197
+ }
2198
+ /**
2199
+ * Run the Conventionality evaluation LLM call
2200
+ */
2201
+ async evaluateConventionality(text, grade, fkScore) {
2202
+ const response = await this.provider.generateStructured({
2203
+ messages: [
2204
+ { role: "system", content: getSystemPrompt4() },
2205
+ { role: "user", content: getUserPrompt4(text, grade, fkScore) }
2206
+ ],
2207
+ schema: ConventionalityOutputSchema,
2208
+ temperature: 0
2209
+ });
2210
+ return {
2211
+ data: response.data,
2212
+ usage: response.usage,
2213
+ latencyMs: response.latencyMs
2214
+ };
2215
+ }
2216
+ };
2217
+ async function evaluateConventionality(text, grade, config) {
2218
+ const evaluator = new ConventionalityEvaluator(config);
2219
+ return evaluator.evaluate(text, grade);
2220
+ }
2010
2221
  var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluator {
2011
2222
  static metadata = {
2012
2223
  id: "text-complexity",
2013
2224
  name: "Text Complexity",
2014
- description: "Composite evaluator analyzing vocabulary, sentence structure, and subject matter knowledge complexity",
2225
+ description: "Composite evaluator analyzing vocabulary, sentence structure, subject matter knowledge, and conventionality complexity",
2015
2226
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
2016
2227
  requiresGoogleKey: true,
2017
2228
  requiresOpenAIKey: true
@@ -2019,12 +2230,14 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2019
2230
  vocabularyEvaluator;
2020
2231
  sentenceStructureEvaluator;
2021
2232
  smkEvaluator;
2233
+ conventionalityEvaluator;
2022
2234
  limit;
2023
2235
  constructor(config) {
2024
2236
  super(config);
2025
2237
  this.vocabularyEvaluator = new VocabularyEvaluator(config);
2026
2238
  this.sentenceStructureEvaluator = new SentenceStructureEvaluator(config);
2027
2239
  this.smkEvaluator = new SmkEvaluator(config);
2240
+ this.conventionalityEvaluator = new ConventionalityEvaluator(config);
2028
2241
  this.limit = pLimit__default.default(3);
2029
2242
  }
2030
2243
  /**
@@ -2050,21 +2263,24 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2050
2263
  this.validateText(text);
2051
2264
  this.validateGrade(grade, new Set(_TextComplexityEvaluator.metadata.supportedGrades));
2052
2265
  const startTime = Date.now();
2053
- const [vocabResult, sentenceResult, smkResult] = await Promise.all([
2266
+ const [vocabResult, sentenceResult, smkResult, conventionalityResult] = await Promise.all([
2054
2267
  this.limit(() => this.runSubEvaluator(this.vocabularyEvaluator, text, grade)),
2055
2268
  this.limit(() => this.runSubEvaluator(this.sentenceStructureEvaluator, text, grade)),
2056
- this.limit(() => this.runSubEvaluator(this.smkEvaluator, text, grade))
2269
+ this.limit(() => this.runSubEvaluator(this.smkEvaluator, text, grade)),
2270
+ this.limit(() => this.runSubEvaluator(this.conventionalityEvaluator, text, grade))
2057
2271
  ]);
2058
2272
  const latencyMs = Date.now() - startTime;
2059
2273
  const vocabFailed = "error" in vocabResult;
2060
2274
  const sentenceFailed = "error" in sentenceResult;
2061
2275
  const smkFailed = "error" in smkResult;
2062
- const hasFailures = vocabFailed || sentenceFailed || smkFailed;
2276
+ const conventionalityFailed = "error" in conventionalityResult;
2277
+ const hasFailures = vocabFailed || sentenceFailed || smkFailed || conventionalityFailed;
2063
2278
  if (hasFailures) {
2064
2279
  const errors = [];
2065
2280
  if (vocabFailed) errors.push(`Vocabulary: ${vocabResult.error.message}`);
2066
2281
  if (sentenceFailed) errors.push(`Sentence structure: ${sentenceResult.error.message}`);
2067
2282
  if (smkFailed) errors.push(`Subject matter knowledge: ${smkResult.error.message}`);
2283
+ if (conventionalityFailed) errors.push(`Conventionality: ${conventionalityResult.error.message}`);
2068
2284
  this.logger.error("Text complexity evaluation completed with errors", {
2069
2285
  evaluator: "text-complexity",
2070
2286
  operation: "evaluate",
@@ -2072,7 +2288,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2072
2288
  errors,
2073
2289
  processingTimeMs: latencyMs
2074
2290
  });
2075
- if (vocabFailed && sentenceFailed && smkFailed) {
2291
+ if (vocabFailed && sentenceFailed && smkFailed && conventionalityFailed) {
2076
2292
  throw new Error(`Text complexity evaluation failed: ${errors.join("; ")}`);
2077
2293
  }
2078
2294
  }
@@ -2093,7 +2309,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2093
2309
  processingTimeMs: latencyMs,
2094
2310
  hasFailures
2095
2311
  });
2096
- return { vocabulary: vocabResult, sentenceStructure: sentenceResult, subjectMatterKnowledge: smkResult };
2312
+ return { vocabulary: vocabResult, sentenceStructure: sentenceResult, subjectMatterKnowledge: smkResult, conventionality: conventionalityResult };
2097
2313
  }
2098
2314
  /**
2099
2315
  * Run a sub-evaluator with error handling.
@@ -2116,6 +2332,7 @@ exports.APIError = APIError;
2116
2332
  exports.AuthenticationError = AuthenticationError;
2117
2333
  exports.ComplexityClassificationSchema = ComplexityClassificationSchema;
2118
2334
  exports.ConfigurationError = ConfigurationError;
2335
+ exports.ConventionalityEvaluator = ConventionalityEvaluator;
2119
2336
  exports.EvaluatorError = EvaluatorError;
2120
2337
  exports.GradeBand = GradeBand;
2121
2338
  exports.GradeLevelAppropriatenessEvaluator = GradeLevelAppropriatenessEvaluator;
@@ -2134,6 +2351,7 @@ exports.VocabularyEvaluator = VocabularyEvaluator;
2134
2351
  exports.addEngineeredFeatures = addEngineeredFeatures;
2135
2352
  exports.calculateFleschKincaidGrade = calculateFleschKincaidGrade;
2136
2353
  exports.calculateReadabilityMetrics = calculateReadabilityMetrics;
2354
+ exports.evaluateConventionality = evaluateConventionality;
2137
2355
  exports.evaluateGradeLevelAppropriateness = evaluateGradeLevelAppropriateness;
2138
2356
  exports.evaluateSentenceStructure = evaluateSentenceStructure;
2139
2357
  exports.evaluateSmk = evaluateSmk;