@learning-commons/evaluators 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -180,9 +180,71 @@ console.log(result._internal.identified_topics); // ["hydraulics", "propulsion",
180
180
 
181
181
  ---
182
182
 
183
- ### 4. Text Complexity Evaluator
183
+ ### 4. Conventionality Evaluator
184
184
 
185
- Composite evaluator that analyzes vocabulary, sentence structure, and subject matter knowledge complexity in parallel.
185
+ Evaluates how explicit, literal, and straightforward a text's meaning is versus how abstract, ironic, figurative, or archaic it is for the target grade level. Based on the Common Core Qualitative Text Complexity Rubric.
186
+
187
+ **Supported Grades:** 3-12
188
+
189
+ **Uses:** Google Gemini 3 Flash Preview
190
+
191
+ **Constructor:**
192
+ ```typescript
193
+ const evaluator = new ConventionalityEvaluator({
194
+ googleApiKey?: string; // Google API key (required by this evaluator)
195
+ maxRetries?: number; // Optional - Max retry attempts (default: 2)
196
+ telemetry?: boolean | TelemetryOptions; // Optional (default: true)
197
+ logger?: Logger; // Optional - Custom logger
198
+ logLevel?: LogLevel; // Optional - Logging verbosity (default: WARN)
199
+ });
200
+ ```
201
+
202
+ **API:**
203
+ ```typescript
204
+ await evaluator.evaluate(text: string, grade: string)
205
+ ```
206
+
207
+ **Returns:**
208
+ ```typescript
209
+ {
210
+ score: 'Slightly complex' | 'Moderately complex' | 'Very complex' | 'Exceedingly complex';
211
+ reasoning: string;
212
+ metadata: {
213
+ model: string;
214
+ processingTimeMs: number;
215
+ };
216
+ _internal: {
217
+ conventionality_features: string[];
218
+ grade_context: string;
219
+ instructional_insights: string;
220
+ complexity_score: 'Slightly complex' | 'Moderately complex' | 'Very complex' | 'Exceedingly complex';
221
+ reasoning: string;
222
+ };
223
+ }
224
+ ```
225
+
226
+ **Example:**
227
+ ```typescript
228
+ import { ConventionalityEvaluator } from '@learning-commons/evaluators';
229
+
230
+ const evaluator = new ConventionalityEvaluator({
231
+ googleApiKey: process.env.GOOGLE_API_KEY,
232
+ });
233
+
234
+ const result = await evaluator.evaluate(
235
+ "The author uses sustained irony to critique societal norms throughout the passage.",
236
+ "10"
237
+ );
238
+ console.log(result.score); // "Very complex"
239
+ console.log(result.reasoning);
240
+ console.log(result._internal.conventionality_features); // ["sustained irony", ...]
241
+ ```
242
+
243
+ ---
244
+
245
+ ### 5. Text Complexity Evaluator
246
+
247
+ Composite evaluator that analyzes vocabulary, sentence structure, subject matter knowledge, and conventionality complexity in parallel.
186
248
 
187
249
  **Supported Grades:** 3-12
188
250
 
@@ -211,10 +273,11 @@ await evaluator.evaluate(text: string, grade: string)
211
273
  vocabulary: EvaluationResult<TextComplexityLevel> | { error: Error };
212
274
  sentenceStructure: EvaluationResult<TextComplexityLevel> | { error: Error };
213
275
  subjectMatterKnowledge: EvaluationResult<TextComplexityLevel> | { error: Error };
276
+ conventionality: EvaluationResult<TextComplexityLevel> | { error: Error };
214
277
  }
215
278
  ```
216
279
 
217
- Each sub-evaluator result is either a full `EvaluationResult` or `{ error: Error }` if that evaluator failed. An error is only thrown if all three fail.
280
+ Each sub-evaluator result is either a full `EvaluationResult` or `{ error: Error }` if that evaluator failed. An error is only thrown if all four fail.
218
281
 
219
282
  **Example:**
220
283
  ```typescript
@@ -236,11 +299,14 @@ if (!('error' in result.sentenceStructure)) {
236
299
  if (!('error' in result.subjectMatterKnowledge)) {
237
300
  console.log('Subject matter knowledge:', result.subjectMatterKnowledge.score);
238
301
  }
302
+ if (!('error' in result.conventionality)) {
303
+ console.log('Conventionality:', result.conventionality.score);
304
+ }
239
305
  ```
240
306
 
241
307
  ---
242
308
 
243
- ### 5. Grade Level Appropriateness Evaluator
309
+ ### 6. Grade Level Appropriateness Evaluator
244
310
 
245
311
  Determines appropriate grade level for text.
246
312
 
@@ -388,6 +454,7 @@ interface BaseEvaluatorConfig {
388
454
  - **Vocabulary**: Requires both `googleApiKey` and `openaiApiKey`
389
455
  - **Sentence Structure**: Requires `openaiApiKey` only
390
456
  - **Subject Matter Knowledge**: Requires `googleApiKey` only
457
+ - **Conventionality**: Requires `googleApiKey` only
391
458
  - **Text Complexity**: Requires both `googleApiKey` and `openaiApiKey`
392
459
  - **Grade Level Appropriateness**: Requires `googleApiKey` only
393
460
 
package/dist/index.cjs CHANGED
@@ -2007,11 +2007,222 @@ async function evaluateSmk(text, grade, config) {
2007
2007
  const evaluator = new SmkEvaluator(config);
2008
2008
  return evaluator.evaluate(text, grade);
2009
2009
  }
2010
+ var ConventionalityOutputSchema = zod.z.object({
2011
+ conventionality_features: zod.z.array(zod.z.string()).describe("The specific language features driving the complexity (e.g., literal narrative, concrete actions, sustained irony, abstract qualities) with direct quotes from the text."),
2012
+ grade_context: zod.z.string().describe("How the conventionality demands compare to general expectations for the provided target grade."),
2013
+ instructional_insights: zod.z.string().describe("Actionable pedagogical suggestions for scaffolding the conventionality features in the classroom."),
2014
+ complexity_score: TextComplexityLevel.describe("The conventionality complexity level of the text"),
2015
+ reasoning: zod.z.string().describe("A detailed explanation of the rating, citing specific features in the text and referencing the expert guardrails.")
2016
+ });
2017
+
2018
+ // ../../evals/prompts/conventionality/system.txt
2019
+ var system_default3 = `Role
2020
+ You are an expert reading teacher and text complexity evaluator. Your task is to evaluate the "Conventionality" of a text and assign it a complexity level based on a 4-point scale, carefully factoring in the target grade level.
2021
+
2022
+ Objective
2023
+ Measure how explicit, literal, and straightforward the text's meaning is, versus how abstract, ironic, figurative, or archaic it is. Focus on the hiddenness of the meaning, the use of conceptual framing, the reliance on abstract reasoning, and the familiarity of the expression for the target grade.
2024
+
2025
+ Complexity Levels
2026
+ - Slightly Complex: Explicit, literal, straightforward, easy to understand. Meaning is entirely on the surface. The language is concrete, and the meaning is clear and procedural, mostly referring to observable materials and actions. Contains no symbolic or ironic language, and conceptual interpretation is not required. Contains limited figurative language that is common and easy to comprehend at the target grade level.
2027
+ - Moderately Complex: Largely explicit and easy to understand with some occasions for more complex meaning. May contain a noticeable amount of archaic/dated phrasing, formal historical prose, vocabulary demands, background knowledge requirements, or expressions that are less familiar to the target grade level, which might make the text feel vague or slightly challenging.
2028
+ - Very Complex: Fairly complex; contains sustained abstract language, conceptual framing, rhetorical idealization, ironic comparisons, or central metaphors that drive the meaning of the text. Addresses concepts, beliefs, and abstract qualities rather than just concrete objects. The tone or underlying message requires interpretation, even if the surface message is clear.
2029
+ - Exceedingly Complex: Dense and complex; contains considerable abstract, ironic, and/or figurative language. Meaning is heavily hidden, deeply conceptual, or relies heavily on complex rhetorical devices.
2030
+
2031
+ Essential Evaluation Rules
2032
+ 1. Concrete & Procedural Texts: Texts that are highly concrete, clear, and procedural (e.g., describing observable materials, mechanical processes, or physical actions) should typically be rated "Slightly Complex."
2033
+
2034
+ 2. Grade-Level Anchoring and Vague Narratives: Always consider the target grade. A literal historical narrative that might be straightforward for older students can be "Moderately Complex" for younger students (e.g., 4th graders) if it involves less familiar expressions, older contexts (e.g., wagon loads, traveling by horseback), vocabulary demands, and background knowledge requirements that make the text feel vague or slightly demanding for that age group.
2035
+
2036
+ 3. Rhetorical Idealization and Abstract Qualities: If an entire argument or narrative is built around abstract qualities (e.g., national character, bravery, liberty) and uses repeated figurative language or personification to portray a subject in a certain idealized way, rate the text as "Very Complex." Even if the figurative language is easy to interpret, the need to interpret the rhetorical tone and sustained abstract focus elevates the complexity beyond level two.
2037
+
2038
+ 4. Common Idioms and Grade-Level Appropriateness: Do NOT elevate a text to "Moderately Complex" simply because it contains a few common idiomatic expressions. If these expressions are widely known and easy for the target grade to understand without making the text feel vague, the text remains "Slightly Complex."
2039
+
2040
+ 5. Conversational and Hypothetical Framing: Using a second-person conversational hook (e.g., "Imagine you are...") to explain a concept is a standard, literal device for engaging readers. It does not constitute complex conceptual framing.
2041
+
2042
+ 6. Sustained vs. Occasional Impact: If abstract language, figurative phrasing, irony, or conceptual framing is sustained throughout the text and central to the argument/meaning, the text is Very Complex. Reserve Moderately Complex for texts where the explicit meaning dominates but the expression, vocabulary, or archaic language provides a moderate conventionality challenge.
2043
+
2044
+ 7. Central Metaphors and Conceptual Framing: When an author uses a central metaphor to explain a concept or uses figurative phrasing to explain how things "work," this abstract reasoning drives the meaning, elevating the text to Very Complex.
2045
+
2046
+ 8. Irony and Abstract Comparisons: Texts that rely on sustained irony, especially through comparative arguments, are inherently Very Complex for younger students.
2047
+
2048
+ 9. Isolate Conventionality from Vocabulary: Do not inflate the Conventionality score just because the text uses archaic, dated, or highly academic vocabulary.
2049
+
2050
+ Input Format
2051
+ You will receive:
2052
+ - text: The passage to evaluate.
2053
+ - grade_level: The target student grade level.
2054
+ - fk_score: The Flesch-Kincaid readability score.
2055
+
2056
+ Output Format
2057
+ Provide a JSON object containing ONLY the following keys:
2058
+ - complexity_score: (String) One of the 4 scale levels exactly as formatted: 'slightly_complex', 'moderately_complex', 'very_complex', or 'exceedingly_complex'.
2059
+ - reasoning: (String) A detailed explanation of the rating, citing specific features in the text and referencing the expert guardrails (e.g., noting if the text relies on abstract qualities/rhetorical idealization, if vocabulary/background knowledge demands make a literal text vague for the grade level, or if it is strictly concrete/procedural).
2060
+ - conventionality_features: (List of Strings) The specific language features driving the complexity (e.g., literal narrative, concrete actions, less familiar expressions, sustained irony, abstract qualities, rhetorical idealization, archaic phrasing) with direct quotes from the text.
2061
+ - grade_context: (String) How the conventionality demands compare to general expectations for the provided target grade.
2062
+ - instructional_insights: (String) Actionable pedagogical suggestions for scaffolding the conventionality features in the classroom.`;
2063
+
2064
+ // ../../evals/prompts/conventionality/user.txt
2065
+ var user_default3 = "Analyze:\nText: {text}\nGrade: {grade}\nFK Score: {fk_score}";
2066
+
2067
+ // src/prompts/conventionality/index.ts
2068
+ function getSystemPrompt4() {
2069
+ return system_default3;
2070
+ }
2071
+ function getUserPrompt4(text, grade, fkScore) {
2072
+ return user_default3.replaceAll("{text}", text).replaceAll("{grade}", grade).replaceAll("{fk_score}", fkScore.toString());
2073
+ }
2074
+
2075
+ // src/evaluators/conventionality.ts
2076
+ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvaluator {
2077
+ static metadata = {
2078
+ id: "conventionality",
2079
+ name: "Conventionality",
2080
+ description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
2081
+ supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
2082
+ requiresGoogleKey: true,
2083
+ requiresOpenAIKey: false
2084
+ };
2085
+ provider;
2086
+ constructor(config) {
2087
+ super(config);
2088
+ this.provider = createProvider({
2089
+ type: "google",
2090
+ model: "gemini-3-flash-preview",
2091
+ apiKey: config.googleApiKey,
2092
+ maxRetries: this.config.maxRetries
2093
+ });
2094
+ }
2095
+ /**
2096
+ * Evaluate conventionality complexity for a given text and grade level
2097
+ *
2098
+ * @param text - The text to evaluate
2099
+ * @param grade - The target grade level (3-12)
2100
+ * @returns Evaluation result with complexity score and detailed analysis
2101
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
2102
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
2103
+ */
2104
+ async evaluate(text, grade) {
2105
+ this.logger.info("Starting Conventionality evaluation", {
2106
+ evaluator: "conventionality",
2107
+ operation: "evaluate",
2108
+ grade,
2109
+ textLength: text.length
2110
+ });
2111
+ const startTime = Date.now();
2112
+ const stageDetails = [];
2113
+ try {
2114
+ this.validateText(text);
2115
+ this.validateGrade(grade, new Set(_ConventionalityEvaluator.metadata.supportedGrades));
2116
+ this.logger.debug("Evaluating conventionality complexity", {
2117
+ evaluator: "conventionality",
2118
+ operation: "conventionality_evaluation"
2119
+ });
2120
+ const fkScore = calculateFleschKincaidGrade(text);
2121
+ const response = await this.evaluateConventionality(text, grade, fkScore);
2122
+ stageDetails.push({
2123
+ stage: "conventionality_evaluation",
2124
+ provider: "google:gemini-3-flash-preview",
2125
+ latency_ms: response.latencyMs,
2126
+ token_usage: {
2127
+ input_tokens: response.usage.inputTokens,
2128
+ output_tokens: response.usage.outputTokens
2129
+ }
2130
+ });
2131
+ const latencyMs = Date.now() - startTime;
2132
+ const totalTokenUsage = {
2133
+ input_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.input_tokens || 0), 0),
2134
+ output_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.output_tokens || 0), 0)
2135
+ };
2136
+ const result = {
2137
+ score: response.data.complexity_score,
2138
+ reasoning: response.data.reasoning,
2139
+ metadata: {
2140
+ model: "google:gemini-3-flash-preview",
2141
+ processingTimeMs: latencyMs
2142
+ },
2143
+ _internal: response.data
2144
+ };
2145
+ this.sendTelemetry({
2146
+ status: "success",
2147
+ latencyMs,
2148
+ textLength: text.length,
2149
+ grade,
2150
+ provider: "google:gemini-3-flash-preview",
2151
+ tokenUsage: totalTokenUsage,
2152
+ metadata: {
2153
+ stage_details: stageDetails
2154
+ },
2155
+ inputText: text
2156
+ }).catch(() => {
2157
+ });
2158
+ this.logger.info("Conventionality evaluation completed successfully", {
2159
+ evaluator: "conventionality",
2160
+ operation: "evaluate",
2161
+ grade,
2162
+ score: result.score,
2163
+ processingTimeMs: latencyMs
2164
+ });
2165
+ return result;
2166
+ } catch (error) {
2167
+ const latencyMs = Date.now() - startTime;
2168
+ this.logger.error("Conventionality evaluation failed", {
2169
+ evaluator: "conventionality",
2170
+ operation: "evaluate",
2171
+ grade,
2172
+ error: error instanceof Error ? error : void 0,
2173
+ processingTimeMs: latencyMs,
2174
+ completedStages: stageDetails.length
2175
+ });
2176
+ const totalTokenUsage = stageDetails.length > 0 ? {
2177
+ input_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.input_tokens || 0), 0),
2178
+ output_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.output_tokens || 0), 0)
2179
+ } : void 0;
2180
+ this.sendTelemetry({
2181
+ status: "error",
2182
+ latencyMs,
2183
+ textLength: text.length,
2184
+ grade,
2185
+ provider: "google:gemini-3-flash-preview",
2186
+ tokenUsage: totalTokenUsage,
2187
+ errorCode: error instanceof Error ? error.name : "UnknownError",
2188
+ metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
2189
+ inputText: text
2190
+ }).catch(() => {
2191
+ });
2192
+ if (error instanceof ValidationError) {
2193
+ throw error;
2194
+ }
2195
+ throw wrapProviderError(error, "Conventionality evaluation failed");
2196
+ }
2197
+ }
2198
+ /**
2199
+ * Run the Conventionality evaluation LLM call
2200
+ */
2201
+ async evaluateConventionality(text, grade, fkScore) {
2202
+ const response = await this.provider.generateStructured({
2203
+ messages: [
2204
+ { role: "system", content: getSystemPrompt4() },
2205
+ { role: "user", content: getUserPrompt4(text, grade, fkScore) }
2206
+ ],
2207
+ schema: ConventionalityOutputSchema,
2208
+ temperature: 0
2209
+ });
2210
+ return {
2211
+ data: response.data,
2212
+ usage: response.usage,
2213
+ latencyMs: response.latencyMs
2214
+ };
2215
+ }
2216
+ };
2217
+ async function evaluateConventionality(text, grade, config) {
2218
+ const evaluator = new ConventionalityEvaluator(config);
2219
+ return evaluator.evaluate(text, grade);
2220
+ }
2010
2221
  var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluator {
2011
2222
  static metadata = {
2012
2223
  id: "text-complexity",
2013
2224
  name: "Text Complexity",
2014
- description: "Composite evaluator analyzing vocabulary, sentence structure, and subject matter knowledge complexity",
2225
+ description: "Composite evaluator analyzing vocabulary, sentence structure, subject matter knowledge, and conventionality complexity",
2015
2226
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
2016
2227
  requiresGoogleKey: true,
2017
2228
  requiresOpenAIKey: true
@@ -2019,12 +2230,14 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2019
2230
  vocabularyEvaluator;
2020
2231
  sentenceStructureEvaluator;
2021
2232
  smkEvaluator;
2233
+ conventionalityEvaluator;
2022
2234
  limit;
2023
2235
  constructor(config) {
2024
2236
  super(config);
2025
2237
  this.vocabularyEvaluator = new VocabularyEvaluator(config);
2026
2238
  this.sentenceStructureEvaluator = new SentenceStructureEvaluator(config);
2027
2239
  this.smkEvaluator = new SmkEvaluator(config);
2240
+ this.conventionalityEvaluator = new ConventionalityEvaluator(config);
2028
2241
  this.limit = pLimit__default.default(3);
2029
2242
  }
2030
2243
  /**
@@ -2050,21 +2263,24 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2050
2263
  this.validateText(text);
2051
2264
  this.validateGrade(grade, new Set(_TextComplexityEvaluator.metadata.supportedGrades));
2052
2265
  const startTime = Date.now();
2053
- const [vocabResult, sentenceResult, smkResult] = await Promise.all([
2266
+ const [vocabResult, sentenceResult, smkResult, conventionalityResult] = await Promise.all([
2054
2267
  this.limit(() => this.runSubEvaluator(this.vocabularyEvaluator, text, grade)),
2055
2268
  this.limit(() => this.runSubEvaluator(this.sentenceStructureEvaluator, text, grade)),
2056
- this.limit(() => this.runSubEvaluator(this.smkEvaluator, text, grade))
2269
+ this.limit(() => this.runSubEvaluator(this.smkEvaluator, text, grade)),
2270
+ this.limit(() => this.runSubEvaluator(this.conventionalityEvaluator, text, grade))
2057
2271
  ]);
2058
2272
  const latencyMs = Date.now() - startTime;
2059
2273
  const vocabFailed = "error" in vocabResult;
2060
2274
  const sentenceFailed = "error" in sentenceResult;
2061
2275
  const smkFailed = "error" in smkResult;
2062
- const hasFailures = vocabFailed || sentenceFailed || smkFailed;
2276
+ const conventionalityFailed = "error" in conventionalityResult;
2277
+ const hasFailures = vocabFailed || sentenceFailed || smkFailed || conventionalityFailed;
2063
2278
  if (hasFailures) {
2064
2279
  const errors = [];
2065
2280
  if (vocabFailed) errors.push(`Vocabulary: ${vocabResult.error.message}`);
2066
2281
  if (sentenceFailed) errors.push(`Sentence structure: ${sentenceResult.error.message}`);
2067
2282
  if (smkFailed) errors.push(`Subject matter knowledge: ${smkResult.error.message}`);
2283
+ if (conventionalityFailed) errors.push(`Conventionality: ${conventionalityResult.error.message}`);
2068
2284
  this.logger.error("Text complexity evaluation completed with errors", {
2069
2285
  evaluator: "text-complexity",
2070
2286
  operation: "evaluate",
@@ -2072,7 +2288,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2072
2288
  errors,
2073
2289
  processingTimeMs: latencyMs
2074
2290
  });
2075
- if (vocabFailed && sentenceFailed && smkFailed) {
2291
+ if (vocabFailed && sentenceFailed && smkFailed && conventionalityFailed) {
2076
2292
  throw new Error(`Text complexity evaluation failed: ${errors.join("; ")}`);
2077
2293
  }
2078
2294
  }
@@ -2093,7 +2309,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
2093
2309
  processingTimeMs: latencyMs,
2094
2310
  hasFailures
2095
2311
  });
2096
- return { vocabulary: vocabResult, sentenceStructure: sentenceResult, subjectMatterKnowledge: smkResult };
2312
+ return { vocabulary: vocabResult, sentenceStructure: sentenceResult, subjectMatterKnowledge: smkResult, conventionality: conventionalityResult };
2097
2313
  }
2098
2314
  /**
2099
2315
  * Run a sub-evaluator with error handling.
@@ -2116,6 +2332,7 @@ exports.APIError = APIError;
2116
2332
  exports.AuthenticationError = AuthenticationError;
2117
2333
  exports.ComplexityClassificationSchema = ComplexityClassificationSchema;
2118
2334
  exports.ConfigurationError = ConfigurationError;
2335
+ exports.ConventionalityEvaluator = ConventionalityEvaluator;
2119
2336
  exports.EvaluatorError = EvaluatorError;
2120
2337
  exports.GradeBand = GradeBand;
2121
2338
  exports.GradeLevelAppropriatenessEvaluator = GradeLevelAppropriatenessEvaluator;
@@ -2134,6 +2351,7 @@ exports.VocabularyEvaluator = VocabularyEvaluator;
2134
2351
  exports.addEngineeredFeatures = addEngineeredFeatures;
2135
2352
  exports.calculateFleschKincaidGrade = calculateFleschKincaidGrade;
2136
2353
  exports.calculateReadabilityMetrics = calculateReadabilityMetrics;
2354
+ exports.evaluateConventionality = evaluateConventionality;
2137
2355
  exports.evaluateGradeLevelAppropriateness = evaluateGradeLevelAppropriateness;
2138
2356
  exports.evaluateSentenceStructure = evaluateSentenceStructure;
2139
2357
  exports.evaluateSmk = evaluateSmk;