@learning-commons/evaluators 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -553,6 +553,33 @@ declare const VocabularyComplexitySchema: z.ZodObject<{
553
553
  }>;
554
554
  type VocabularyInternal = z.infer<typeof VocabularyComplexitySchema>;
555
555
 
556
+ /**
557
+ * Subject Matter Knowledge evaluation output schema
558
+ */
559
+ declare const SmkOutputSchema: z.ZodObject<{
560
+ identified_topics: z.ZodArray<z.ZodString, "many">;
561
+ curriculum_check: z.ZodString;
562
+ assumptions_and_scaffolding: z.ZodString;
563
+ friction_analysis: z.ZodString;
564
+ complexity_score: z.ZodEnum<["Slightly complex", "Moderately complex", "Very complex", "Exceedingly complex"]>;
565
+ reasoning: z.ZodString;
566
+ }, "strip", z.ZodTypeAny, {
567
+ reasoning: string;
568
+ complexity_score: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
569
+ identified_topics: string[];
570
+ curriculum_check: string;
571
+ assumptions_and_scaffolding: string;
572
+ friction_analysis: string;
573
+ }, {
574
+ reasoning: string;
575
+ complexity_score: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
576
+ identified_topics: string[];
577
+ curriculum_check: string;
578
+ assumptions_and_scaffolding: string;
579
+ friction_analysis: string;
580
+ }>;
581
+ type SmkInternal = z.infer<typeof SmkOutputSchema>;
582
+
556
583
  /**
557
584
  * Evaluation status
558
585
  */
@@ -1024,6 +1051,69 @@ declare class GradeLevelAppropriatenessEvaluator extends BaseEvaluator {
1024
1051
  */
1025
1052
  declare function evaluateGradeLevelAppropriateness(text: string, config: BaseEvaluatorConfig): Promise<EvaluationResult<GradeBand, GradeLevelAppropriatenessInternal>>;
1026
1053
 
1054
+ /**
1055
+ * Subject Matter Knowledge (SMK) Evaluator
1056
+ *
1057
+ * Evaluates the background knowledge demands of educational texts relative to grade level.
1058
+ * Determines how much prior subject knowledge a student needs to comprehend the text.
1059
+ *
1060
+ * Based on the Common Core Qualitative Text Complexity Rubric with 4 levels:
1061
+ * - Slightly complex
1062
+ * - Moderately complex
1063
+ * - Very complex
1064
+ * - Exceedingly complex
1065
+ *
1066
+ * @example
1067
+ * ```typescript
1068
+ * const evaluator = new SmkEvaluator({
1069
+ * googleApiKey: process.env.GOOGLE_API_KEY
1070
+ * });
1071
+ *
1072
+ * const result = await evaluator.evaluate(text, "6");
1073
+ * console.log(result.score); // "Moderately complex"
1074
+ * console.log(result.reasoning);
1075
+ * ```
1076
+ */
1077
+ declare class SmkEvaluator extends BaseEvaluator {
1078
+ static readonly metadata: {
1079
+ id: string;
1080
+ name: string;
1081
+ description: string;
1082
+ supportedGrades: readonly ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"];
1083
+ requiresGoogleKey: boolean;
1084
+ requiresOpenAIKey: boolean;
1085
+ };
1086
+ private provider;
1087
+ constructor(config: BaseEvaluatorConfig);
1088
+ /**
1089
+ * Evaluate subject matter knowledge complexity for a given text and grade level
1090
+ *
1091
+ * @param text - The text to evaluate
1092
+ * @param grade - The target grade level (3-12)
1093
+ * @returns Evaluation result with complexity score and detailed analysis
1094
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1095
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1096
+ */
1097
+ evaluate(text: string, grade: string): Promise<EvaluationResult<TextComplexityLevel, SmkInternal>>;
1098
+ /**
1099
+ * Run the SMK evaluation LLM call
1100
+ */
1101
+ private evaluateSmk;
1102
+ }
1103
+ /**
1104
+ * Functional API for SMK evaluation
1105
+ *
1106
+ * @example
1107
+ * ```typescript
1108
+ * const result = await evaluateSmk(
1109
+ * "Hydraulic propulsion works by sucking water at the bow and forcing it sternward.",
1110
+ * "10",
1111
+ * { googleApiKey: process.env.GOOGLE_API_KEY }
1112
+ * );
1113
+ * ```
1114
+ */
1115
+ declare function evaluateSmk(text: string, grade: string, config: BaseEvaluatorConfig): Promise<EvaluationResult<TextComplexityLevel, SmkInternal>>;
1116
+
1027
1117
  /**
1028
1118
  * Result map returned by TextComplexityEvaluator.
1029
1119
  * Each key holds the full evaluation result from its sub-evaluator, or an error if it failed.
@@ -1035,16 +1125,20 @@ interface TextComplexityResult {
1035
1125
  sentenceStructure: EvaluationResult<TextComplexityLevel, SentenceStructureInternal> | {
1036
1126
  error: Error;
1037
1127
  };
1128
+ subjectMatterKnowledge: EvaluationResult<TextComplexityLevel, SmkInternal> | {
1129
+ error: Error;
1130
+ };
1038
1131
  }
1039
1132
  /**
1040
1133
  * Text Complexity Evaluator
1041
1134
  *
1042
- * Composite evaluator that analyzes both vocabulary and sentence structure complexity.
1043
- * Runs both evaluations in parallel with concurrency control to avoid rate limiting.
1135
+ * Composite evaluator that analyzes vocabulary, sentence structure, and subject matter knowledge.
1136
+ * Runs all evaluations in parallel with concurrency control to avoid rate limiting.
1044
1137
  *
1045
1138
  * Uses:
1046
1139
  * - VocabularyEvaluator (Google Gemini 2.5 Pro + OpenAI GPT-4o)
1047
1140
  * - SentenceStructureEvaluator (OpenAI GPT-4o)
1141
+ * - SmkEvaluator (Google Gemini 3 Flash Preview)
1048
1142
  *
1049
1143
  * @example
1050
1144
  * ```typescript
@@ -1070,13 +1164,14 @@ declare class TextComplexityEvaluator extends BaseEvaluator {
1070
1164
  };
1071
1165
  private vocabularyEvaluator;
1072
1166
  private sentenceStructureEvaluator;
1167
+ private smkEvaluator;
1073
1168
  private limit;
1074
1169
  constructor(config: BaseEvaluatorConfig);
1075
1170
  /**
1076
1171
  * Evaluate text complexity for a given text and grade level
1077
1172
  *
1078
- * Runs vocabulary and sentence structure evaluations in parallel with concurrency control.
1079
- * If both sub-evaluators fail, throws an error. Otherwise returns a result map where
1173
+ * Runs vocabulary, sentence structure, and SMK evaluations in parallel with concurrency control.
1174
+ * If all three sub-evaluators fail, throws an error. Otherwise returns a result map where
1080
1175
  * failed sub-evaluators are represented as `{ error: Error }`.
1081
1176
  *
1082
1177
  * @param text - The text to evaluate
@@ -1139,4 +1234,4 @@ declare function addEngineeredFeatures(analysis: SentenceAnalysis): SentenceFeat
1139
1234
  */
1140
1235
  declare function featuresToJSON(features: SentenceFeatures, decimals?: number, castToInt?: boolean): string;
1141
1236
 
1142
- export { APIError, AuthenticationError, type BaseEvaluatorConfig, type ComplexityClassification, ComplexityClassificationSchema, ConfigurationError, type EvaluationError, type EvaluationMetadata, type EvaluationResult, EvaluatorError, type EvaluatorMetadata, GradeBand, GradeLevelAppropriatenessEvaluator, type GradeLevelAppropriatenessInternal, GradeLevelAppropriatenessSchema, type LLMProvider, type LLMRequest, type LLMResponse, type LogContext, LogLevel, type Logger, type Message, NetworkError, type ProviderConfig, RateLimitError, type ReadabilityMetrics, type SentenceAnalysis, SentenceAnalysisSchema, type SentenceFeatures, SentenceStructureEvaluator, type SentenceStructureInternal, type TelemetryOptions, TextComplexityEvaluator, TextComplexityLevel, type TextComplexityResult, type TextGenerationResponse, TimeoutError, ValidationError, VocabularyEvaluator, type VocabularyInternal, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateGradeLevelAppropriateness, evaluateSentenceStructure, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };
1237
+ export { APIError, AuthenticationError, type BaseEvaluatorConfig, type ComplexityClassification, ComplexityClassificationSchema, ConfigurationError, type EvaluationError, type EvaluationMetadata, type EvaluationResult, EvaluatorError, type EvaluatorMetadata, GradeBand, GradeLevelAppropriatenessEvaluator, type GradeLevelAppropriatenessInternal, GradeLevelAppropriatenessSchema, type LLMProvider, type LLMRequest, type LLMResponse, type LogContext, LogLevel, type Logger, type Message, NetworkError, type ProviderConfig, RateLimitError, type ReadabilityMetrics, type SentenceAnalysis, SentenceAnalysisSchema, type SentenceFeatures, SentenceStructureEvaluator, type SentenceStructureInternal, SmkEvaluator, type SmkInternal, type TelemetryOptions, TextComplexityEvaluator, TextComplexityLevel, type TextComplexityResult, type TextGenerationResponse, TimeoutError, ValidationError, VocabularyEvaluator, type VocabularyInternal, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateGradeLevelAppropriateness, evaluateSentenceStructure, evaluateSmk, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };
package/dist/index.d.ts CHANGED
@@ -553,6 +553,33 @@ declare const VocabularyComplexitySchema: z.ZodObject<{
553
553
  }>;
554
554
  type VocabularyInternal = z.infer<typeof VocabularyComplexitySchema>;
555
555
 
556
+ /**
557
+ * Subject Matter Knowledge evaluation output schema
558
+ */
559
+ declare const SmkOutputSchema: z.ZodObject<{
560
+ identified_topics: z.ZodArray<z.ZodString, "many">;
561
+ curriculum_check: z.ZodString;
562
+ assumptions_and_scaffolding: z.ZodString;
563
+ friction_analysis: z.ZodString;
564
+ complexity_score: z.ZodEnum<["Slightly complex", "Moderately complex", "Very complex", "Exceedingly complex"]>;
565
+ reasoning: z.ZodString;
566
+ }, "strip", z.ZodTypeAny, {
567
+ reasoning: string;
568
+ complexity_score: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
569
+ identified_topics: string[];
570
+ curriculum_check: string;
571
+ assumptions_and_scaffolding: string;
572
+ friction_analysis: string;
573
+ }, {
574
+ reasoning: string;
575
+ complexity_score: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
576
+ identified_topics: string[];
577
+ curriculum_check: string;
578
+ assumptions_and_scaffolding: string;
579
+ friction_analysis: string;
580
+ }>;
581
+ type SmkInternal = z.infer<typeof SmkOutputSchema>;
582
+
556
583
  /**
557
584
  * Evaluation status
558
585
  */
@@ -1024,6 +1051,69 @@ declare class GradeLevelAppropriatenessEvaluator extends BaseEvaluator {
1024
1051
  */
1025
1052
  declare function evaluateGradeLevelAppropriateness(text: string, config: BaseEvaluatorConfig): Promise<EvaluationResult<GradeBand, GradeLevelAppropriatenessInternal>>;
1026
1053
 
1054
+ /**
1055
+ * Subject Matter Knowledge (SMK) Evaluator
1056
+ *
1057
+ * Evaluates the background knowledge demands of educational texts relative to grade level.
1058
+ * Determines how much prior subject knowledge a student needs to comprehend the text.
1059
+ *
1060
+ * Based on the Common Core Qualitative Text Complexity Rubric with 4 levels:
1061
+ * - Slightly complex
1062
+ * - Moderately complex
1063
+ * - Very complex
1064
+ * - Exceedingly complex
1065
+ *
1066
+ * @example
1067
+ * ```typescript
1068
+ * const evaluator = new SmkEvaluator({
1069
+ * googleApiKey: process.env.GOOGLE_API_KEY
1070
+ * });
1071
+ *
1072
+ * const result = await evaluator.evaluate(text, "6");
1073
+ * console.log(result.score); // "Moderately complex"
1074
+ * console.log(result.reasoning);
1075
+ * ```
1076
+ */
1077
+ declare class SmkEvaluator extends BaseEvaluator {
1078
+ static readonly metadata: {
1079
+ id: string;
1080
+ name: string;
1081
+ description: string;
1082
+ supportedGrades: readonly ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"];
1083
+ requiresGoogleKey: boolean;
1084
+ requiresOpenAIKey: boolean;
1085
+ };
1086
+ private provider;
1087
+ constructor(config: BaseEvaluatorConfig);
1088
+ /**
1089
+ * Evaluate subject matter knowledge complexity for a given text and grade level
1090
+ *
1091
+ * @param text - The text to evaluate
1092
+ * @param grade - The target grade level (3-12)
1093
+ * @returns Evaluation result with complexity score and detailed analysis
1094
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1095
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1096
+ */
1097
+ evaluate(text: string, grade: string): Promise<EvaluationResult<TextComplexityLevel, SmkInternal>>;
1098
+ /**
1099
+ * Run the SMK evaluation LLM call
1100
+ */
1101
+ private evaluateSmk;
1102
+ }
1103
+ /**
1104
+ * Functional API for SMK evaluation
1105
+ *
1106
+ * @example
1107
+ * ```typescript
1108
+ * const result = await evaluateSmk(
1109
+ * "Hydraulic propulsion works by sucking water at the bow and forcing it sternward.",
1110
+ * "10",
1111
+ * { googleApiKey: process.env.GOOGLE_API_KEY }
1112
+ * );
1113
+ * ```
1114
+ */
1115
+ declare function evaluateSmk(text: string, grade: string, config: BaseEvaluatorConfig): Promise<EvaluationResult<TextComplexityLevel, SmkInternal>>;
1116
+
1027
1117
  /**
1028
1118
  * Result map returned by TextComplexityEvaluator.
1029
1119
  * Each key holds the full evaluation result from its sub-evaluator, or an error if it failed.
@@ -1035,16 +1125,20 @@ interface TextComplexityResult {
1035
1125
  sentenceStructure: EvaluationResult<TextComplexityLevel, SentenceStructureInternal> | {
1036
1126
  error: Error;
1037
1127
  };
1128
+ subjectMatterKnowledge: EvaluationResult<TextComplexityLevel, SmkInternal> | {
1129
+ error: Error;
1130
+ };
1038
1131
  }
1039
1132
  /**
1040
1133
  * Text Complexity Evaluator
1041
1134
  *
1042
- * Composite evaluator that analyzes both vocabulary and sentence structure complexity.
1043
- * Runs both evaluations in parallel with concurrency control to avoid rate limiting.
1135
+ * Composite evaluator that analyzes vocabulary, sentence structure, and subject matter knowledge.
1136
+ * Runs all evaluations in parallel with concurrency control to avoid rate limiting.
1044
1137
  *
1045
1138
  * Uses:
1046
1139
  * - VocabularyEvaluator (Google Gemini 2.5 Pro + OpenAI GPT-4o)
1047
1140
  * - SentenceStructureEvaluator (OpenAI GPT-4o)
1141
+ * - SmkEvaluator (Google Gemini 3 Flash Preview)
1048
1142
  *
1049
1143
  * @example
1050
1144
  * ```typescript
@@ -1070,13 +1164,14 @@ declare class TextComplexityEvaluator extends BaseEvaluator {
1070
1164
  };
1071
1165
  private vocabularyEvaluator;
1072
1166
  private sentenceStructureEvaluator;
1167
+ private smkEvaluator;
1073
1168
  private limit;
1074
1169
  constructor(config: BaseEvaluatorConfig);
1075
1170
  /**
1076
1171
  * Evaluate text complexity for a given text and grade level
1077
1172
  *
1078
- * Runs vocabulary and sentence structure evaluations in parallel with concurrency control.
1079
- * If both sub-evaluators fail, throws an error. Otherwise returns a result map where
1173
+ * Runs vocabulary, sentence structure, and SMK evaluations in parallel with concurrency control.
1174
+ * If all three sub-evaluators fail, throws an error. Otherwise returns a result map where
1080
1175
  * failed sub-evaluators are represented as `{ error: Error }`.
1081
1176
  *
1082
1177
  * @param text - The text to evaluate
@@ -1139,4 +1234,4 @@ declare function addEngineeredFeatures(analysis: SentenceAnalysis): SentenceFeat
1139
1234
  */
1140
1235
  declare function featuresToJSON(features: SentenceFeatures, decimals?: number, castToInt?: boolean): string;
1141
1236
 
1142
- export { APIError, AuthenticationError, type BaseEvaluatorConfig, type ComplexityClassification, ComplexityClassificationSchema, ConfigurationError, type EvaluationError, type EvaluationMetadata, type EvaluationResult, EvaluatorError, type EvaluatorMetadata, GradeBand, GradeLevelAppropriatenessEvaluator, type GradeLevelAppropriatenessInternal, GradeLevelAppropriatenessSchema, type LLMProvider, type LLMRequest, type LLMResponse, type LogContext, LogLevel, type Logger, type Message, NetworkError, type ProviderConfig, RateLimitError, type ReadabilityMetrics, type SentenceAnalysis, SentenceAnalysisSchema, type SentenceFeatures, SentenceStructureEvaluator, type SentenceStructureInternal, type TelemetryOptions, TextComplexityEvaluator, TextComplexityLevel, type TextComplexityResult, type TextGenerationResponse, TimeoutError, ValidationError, VocabularyEvaluator, type VocabularyInternal, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateGradeLevelAppropriateness, evaluateSentenceStructure, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };
1237
+ export { APIError, AuthenticationError, type BaseEvaluatorConfig, type ComplexityClassification, ComplexityClassificationSchema, ConfigurationError, type EvaluationError, type EvaluationMetadata, type EvaluationResult, EvaluatorError, type EvaluatorMetadata, GradeBand, GradeLevelAppropriatenessEvaluator, type GradeLevelAppropriatenessInternal, GradeLevelAppropriatenessSchema, type LLMProvider, type LLMRequest, type LLMResponse, type LogContext, LogLevel, type Logger, type Message, NetworkError, type ProviderConfig, RateLimitError, type ReadabilityMetrics, type SentenceAnalysis, SentenceAnalysisSchema, type SentenceFeatures, SentenceStructureEvaluator, type SentenceStructureInternal, SmkEvaluator, type SmkInternal, type TelemetryOptions, TextComplexityEvaluator, TextComplexityLevel, type TextComplexityResult, type TextGenerationResponse, TimeoutError, ValidationError, VocabularyEvaluator, type VocabularyInternal, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateGradeLevelAppropriateness, evaluateSentenceStructure, evaluateSmk, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };
package/dist/index.js CHANGED
@@ -1761,29 +1761,269 @@ async function evaluateGradeLevelAppropriateness(text, config) {
1761
1761
  const evaluator = new GradeLevelAppropriatenessEvaluator(config);
1762
1762
  return evaluator.evaluate(text);
1763
1763
  }
1764
+ var SmkOutputSchema = z.object({
1765
+ identified_topics: z.array(z.string()).describe("List of major subjects/concepts found in the text."),
1766
+ curriculum_check: z.string().describe("Whether the topics are standard K-8 or specialized high school level."),
1767
+ assumptions_and_scaffolding: z.string().describe("What the author assumes the reader knows vs. what is explained."),
1768
+ friction_analysis: z.string().describe("Whether difficulty comes from vocabulary/structure or actual knowledge demands."),
1769
+ complexity_score: TextComplexityLevel.describe("The subject matter knowledge complexity level of the text"),
1770
+ reasoning: z.string().describe("A brief synthesis of why the text fits the chosen complexity level.")
1771
+ });
1772
+
1773
+ // ../../evals/prompts/subject-matter-knowledge/system.txt
1774
+ var system_default2 = `
1775
+ To perform the task of evaluating text complexity based on Subject Matter Knowledge (SMK), strictly adhere to the following instructions.
1776
+ Role
1777
+ You are an expert K-12 Literacy Pedagogue and Text Complexity Evaluator. Your specific focus is analyzing Subject Matter Knowledge (SMK) demands according to the Common Core Qualitative Text Complexity Rubric.
1778
+ Objective
1779
+ Analyze a provided text relative to a target grade_level. You must determine the extent of background knowledge required to comprehend the text. You must distinguish between Common/Standard knowledge (generally lower/moderate complexity) and Specialized/Theoretical knowledge (generally higher complexity).
1780
+ Input Data
1781
+ text: The passage to analyze.
1782
+ grade_level: The target student grade (integer).
1783
+ fk_score: Flesch-Kincaid Grade Level. Note: Use this only as a loose proxy for sentence structure. Do not let a high FK score artificially inflate the Subject Matter Knowledge score if the concepts remain simple.
1784
+
1785
+ 1. The Rubric: Subject Matter Knowledge (SMK)
1786
+ 1. Slightly Complex
1787
+ Scope: Everyday, practical knowledge, and Introduction to Skills.
1788
+ Concept Type: Concrete, directly observable, and familiar.
1789
+ Key Indicator: "How-to" texts involving familiar objects (e.g., drawing a cupboard, playing a game, family life). Even if specific terms (like "scale" or "measure") are used, if the application is on a common object, it remains Slightly Complex.
1790
+ 2. Moderately Complex
1791
+ Scope: Common Discipline-Specific Knowledge or Narrative History.
1792
+ Definition: Topics widely introduced in K-8 curricula (Basic American History, Geography, Earth Science, Biology).
1793
+ Key Characteristic: The text bridges concrete descriptions with abstract themes (e.g., using farming to discuss justice), OR narrates historical events via sensory details.
1794
+ Spatial Reasoning: Texts requiring mental manipulation of maps/routes are generally Moderate, unless the object is a familiar household item (see Slightly Complex).
1795
+ 3. Very Complex
1796
+ Scope: Specialized Discipline-Specific, Engineering Mechanics, or Political Theory.
1797
+ Definition: Topics characteristic of High School (9-12) curricula requiring abstract mental models.
1798
+ Key Characteristic: Requires understanding mechanisms (how physics works/propulsion), chemical composition, or undefined political stakes (specific treaties, alliances, or secularization without context).
1799
+ 4. Exceedingly Complex
1800
+ Scope: Professional or Academic knowledge.
1801
+
1802
+ 2. The Expert Mental Model (Decision Logic)
1803
+ Use these refined rules to categorize cases.
1804
+ Rule A: The "Layers of Meaning" Check
1805
+ Concrete -> Abstract (Moderate): The text describes concrete things (farming) to argue an abstract point (justice, rights).
1806
+ Concrete -> Concrete (Slightly): The text describes concrete things (lines, paper) to achieve a concrete result (drawing a cupboard). Do not over-rank practical instructions.
1807
+ Rule B: The Science & Engineering Boundary
1808
+ Observational (Moderate): Habitats, Water Cycle, observable traits, simple definitions.
1809
+ Mechanistic/Theoretical (Very): Engineering mechanics (how propulsion works via reaction), Instrumentation (using a spectroscope), or Chemical/Atomic theory.
1810
+ Test: Does the text explain how a machine functions using physical principles? If yes, it is Very Complex.
1811
+ Rule C: The History/Social Studies Boundary
1812
+ General/Narrative (Moderate):
1813
+ Sensory: Battle descriptions focusing on sights/sounds (flashes, smoke).
1814
+ Standard Topics: Immigration, Slavery, Government, Geography. Lists of nationalities or religions are "Common Knowledge" for Grades 6-8.
1815
+ Political/Contextual (Very):
1816
+ Implicit Context: Texts assuming knowledge of specific political factions, treaties, or the causes of events without explanation (e.g., "The Allies," "The Front," "The secularization of the clergy").
1817
+ Test: If the reader must know why two groups are fighting or the specific political history of a revolution to understand the text, it is Very Complex.
1818
+ Rule D: The "Technical vs. Practical" Trap
1819
+ Scenario: A text teaches a technical skill (e.g., Technical Drawing/Technology) but applies it to a familiar object (a cupboard).
1820
+ Decision: Slightly Complex.
1821
+ Reasoning: Do not confuse "Technical Vocabulary" (scale, thick lines) with "Theoretical Complexity." If the underlying concept is familiar (furniture), the SMK load is low.
1822
+
1823
+ 3. Critical Calibration Examples
1824
+ Text: "Make a rough sketch... How many shelves should the cupboard have?" (Grade 2) -> Slightly Complex.
1825
+ Reasoning: (Rule D/Rule A) Although it mentions "scale" and "technology," the task is concrete and relies on everyday knowledge.
1826
+ Text: "Hydraulic propulsion works by sucking water at the bow and forcing it sternward." (Grade 10) -> Very Complex.
1827
+ Reasoning: (Rule B) Explains a mechanism using physics principles.
1828
+ Text: "The Allies fight the enemy's cavalry; we remember the hospitality to priests during the Revolution." (Grade 6) -> Very Complex.
1829
+ Reasoning: (Rule C) Assumes undefined knowledge of WWI alliances and the specific political history of the French Revolution.
1830
+ Text: "Immigrants from Poland, Italy, and Russia arrived. Most were Catholic or Orthodox." (Grade 7) -> Moderately Complex.
1831
+ Reasoning: (Rule C) Standard K-8 topic. Lists of nationalities are content vocabulary, not specialized theory.
1832
+
1833
+ 4. Output Format
1834
+ Return your analysis in a valid JSON object. Do not include markdown formatting.
1835
+ Keys:
1836
+ - identified_topics: List[str] identifying the core subjects.
1837
+ - curriculum_check: String explaining if the topics are "Standard/General" (typical for K-8) or "Specialized/High School" (typical for 9-12).
1838
+ - assumptions_and_scaffolding: String analyzing what the author assumes the reader knows vs what is explained.
1839
+ - friction_analysis: String discussing the gap between Concrete description and Abstract meaning.
1840
+ - complexity_score: String (One of: slightly_complex, moderately_complex, very_complex, exceedingly_complex).
1841
+ - reasoning: String synthesizing the decision.
1842
+
1843
+ `;
1844
+
1845
+ // ../../evals/prompts/subject-matter-knowledge/user.txt
1846
+ var user_default2 = "Analyze:\nText: {text}\nGrade: {grade}\nFK Score: {fk_score}";
1847
+
1848
+ // src/prompts/subject-matter-knowledge/index.ts
1849
+ function getSystemPrompt3() {
1850
+ return system_default2;
1851
+ }
1852
+ function getUserPrompt3(text, grade, fkScore) {
1853
+ return user_default2.replaceAll("{text}", text).replaceAll("{grade}", grade).replaceAll("{fk_score}", fkScore.toString());
1854
+ }
1855
+
1856
+ // src/evaluators/smk.ts
1857
+ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
1858
+ static metadata = {
1859
+ id: "subject-matter-knowledge",
1860
+ name: "Subject Matter Knowledge",
1861
+ description: "Evaluates background knowledge demands of educational texts relative to grade level",
1862
+ supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1863
+ requiresGoogleKey: true,
1864
+ requiresOpenAIKey: false
1865
+ };
1866
+ provider;
1867
+ constructor(config) {
1868
+ super(config);
1869
+ this.provider = createProvider({
1870
+ type: "google",
1871
+ model: "gemini-3-flash-preview",
1872
+ apiKey: config.googleApiKey,
1873
+ maxRetries: this.config.maxRetries
1874
+ });
1875
+ }
1876
+ /**
1877
+ * Evaluate subject matter knowledge complexity for a given text and grade level
1878
+ *
1879
+ * @param text - The text to evaluate
1880
+ * @param grade - The target grade level (3-12)
1881
+ * @returns Evaluation result with complexity score and detailed analysis
1882
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
1883
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1884
+ */
1885
+ async evaluate(text, grade) {
1886
+ this.logger.info("Starting SMK evaluation", {
1887
+ evaluator: "subject-matter-knowledge",
1888
+ operation: "evaluate",
1889
+ grade,
1890
+ textLength: text.length
1891
+ });
1892
+ const startTime = Date.now();
1893
+ const stageDetails = [];
1894
+ try {
1895
+ this.validateText(text);
1896
+ this.validateGrade(grade, new Set(_SmkEvaluator.metadata.supportedGrades));
1897
+ this.logger.debug("Evaluating subject matter knowledge complexity", {
1898
+ evaluator: "subject-matter-knowledge",
1899
+ operation: "smk_evaluation"
1900
+ });
1901
+ const fkScore = calculateFleschKincaidGrade(text);
1902
+ const response = await this.evaluateSmk(text, grade, fkScore);
1903
+ stageDetails.push({
1904
+ stage: "smk_evaluation",
1905
+ provider: "google:gemini-3-flash-preview",
1906
+ latency_ms: response.latencyMs,
1907
+ token_usage: {
1908
+ input_tokens: response.usage.inputTokens,
1909
+ output_tokens: response.usage.outputTokens
1910
+ }
1911
+ });
1912
+ const latencyMs = Date.now() - startTime;
1913
+ const totalTokenUsage = {
1914
+ input_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.input_tokens || 0), 0),
1915
+ output_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.output_tokens || 0), 0)
1916
+ };
1917
+ const result = {
1918
+ score: response.data.complexity_score,
1919
+ reasoning: response.data.reasoning,
1920
+ metadata: {
1921
+ model: "google:gemini-3-flash-preview",
1922
+ processingTimeMs: latencyMs
1923
+ },
1924
+ _internal: response.data
1925
+ };
1926
+ this.sendTelemetry({
1927
+ status: "success",
1928
+ latencyMs,
1929
+ textLength: text.length,
1930
+ grade,
1931
+ provider: "google:gemini-3-flash-preview",
1932
+ tokenUsage: totalTokenUsage,
1933
+ metadata: {
1934
+ stage_details: stageDetails
1935
+ },
1936
+ inputText: text
1937
+ }).catch(() => {
1938
+ });
1939
+ this.logger.info("SMK evaluation completed successfully", {
1940
+ evaluator: "subject-matter-knowledge",
1941
+ operation: "evaluate",
1942
+ grade,
1943
+ score: result.score,
1944
+ processingTimeMs: latencyMs
1945
+ });
1946
+ return result;
1947
+ } catch (error) {
1948
+ const latencyMs = Date.now() - startTime;
1949
+ this.logger.error("SMK evaluation failed", {
1950
+ evaluator: "subject-matter-knowledge",
1951
+ operation: "evaluate",
1952
+ grade,
1953
+ error: error instanceof Error ? error : void 0,
1954
+ processingTimeMs: latencyMs,
1955
+ completedStages: stageDetails.length
1956
+ });
1957
+ const totalTokenUsage = stageDetails.length > 0 ? {
1958
+ input_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.input_tokens || 0), 0),
1959
+ output_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.output_tokens || 0), 0)
1960
+ } : void 0;
1961
+ this.sendTelemetry({
1962
+ status: "error",
1963
+ latencyMs,
1964
+ textLength: text.length,
1965
+ grade,
1966
+ provider: "google:gemini-3-flash-preview",
1967
+ tokenUsage: totalTokenUsage,
1968
+ errorCode: error instanceof Error ? error.name : "UnknownError",
1969
+ metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
1970
+ inputText: text
1971
+ }).catch(() => {
1972
+ });
1973
+ if (error instanceof ValidationError) {
1974
+ throw error;
1975
+ }
1976
+ throw wrapProviderError(error, "SMK evaluation failed");
1977
+ }
1978
+ }
1979
+ /**
1980
+ * Run the SMK evaluation LLM call
1981
+ */
1982
+ async evaluateSmk(text, grade, fkScore) {
1983
+ const response = await this.provider.generateStructured({
1984
+ messages: [
1985
+ { role: "system", content: getSystemPrompt3() },
1986
+ { role: "user", content: getUserPrompt3(text, grade, fkScore) }
1987
+ ],
1988
+ schema: SmkOutputSchema,
1989
+ temperature: 0
1990
+ });
1991
+ return {
1992
+ data: response.data,
1993
+ usage: response.usage,
1994
+ latencyMs: response.latencyMs
1995
+ };
1996
+ }
1997
+ };
1998
+ async function evaluateSmk(text, grade, config) {
1999
+ const evaluator = new SmkEvaluator(config);
2000
+ return evaluator.evaluate(text, grade);
2001
+ }
1764
2002
  var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluator {
1765
2003
  static metadata = {
1766
2004
  id: "text-complexity",
1767
2005
  name: "Text Complexity",
1768
- description: "Composite evaluator analyzing vocabulary and sentence structure complexity",
2006
+ description: "Composite evaluator analyzing vocabulary, sentence structure, and subject matter knowledge complexity",
1769
2007
  supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
1770
2008
  requiresGoogleKey: true,
1771
2009
  requiresOpenAIKey: true
1772
2010
  };
1773
2011
  vocabularyEvaluator;
1774
2012
  sentenceStructureEvaluator;
2013
+ smkEvaluator;
1775
2014
  limit;
1776
2015
  constructor(config) {
1777
2016
  super(config);
1778
2017
  this.vocabularyEvaluator = new VocabularyEvaluator(config);
1779
2018
  this.sentenceStructureEvaluator = new SentenceStructureEvaluator(config);
2019
+ this.smkEvaluator = new SmkEvaluator(config);
1780
2020
  this.limit = pLimit(3);
1781
2021
  }
1782
2022
  /**
1783
2023
  * Evaluate text complexity for a given text and grade level
1784
2024
  *
1785
- * Runs vocabulary and sentence structure evaluations in parallel with concurrency control.
1786
- * If both sub-evaluators fail, throws an error. Otherwise returns a result map where
2025
+ * Runs vocabulary, sentence structure, and SMK evaluations in parallel with concurrency control.
2026
+ * If all three sub-evaluators fail, throws an error. Otherwise returns a result map where
1787
2027
  * failed sub-evaluators are represented as `{ error: Error }`.
1788
2028
  *
1789
2029
  * @param text - The text to evaluate
@@ -1802,18 +2042,21 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
1802
2042
  this.validateText(text);
1803
2043
  this.validateGrade(grade, new Set(_TextComplexityEvaluator.metadata.supportedGrades));
1804
2044
  const startTime = Date.now();
1805
- const [vocabResult, sentenceResult] = await Promise.all([
2045
+ const [vocabResult, sentenceResult, smkResult] = await Promise.all([
1806
2046
  this.limit(() => this.runSubEvaluator(this.vocabularyEvaluator, text, grade)),
1807
- this.limit(() => this.runSubEvaluator(this.sentenceStructureEvaluator, text, grade))
2047
+ this.limit(() => this.runSubEvaluator(this.sentenceStructureEvaluator, text, grade)),
2048
+ this.limit(() => this.runSubEvaluator(this.smkEvaluator, text, grade))
1808
2049
  ]);
1809
2050
  const latencyMs = Date.now() - startTime;
1810
2051
  const vocabFailed = "error" in vocabResult;
1811
2052
  const sentenceFailed = "error" in sentenceResult;
1812
- const hasFailures = vocabFailed || sentenceFailed;
2053
+ const smkFailed = "error" in smkResult;
2054
+ const hasFailures = vocabFailed || sentenceFailed || smkFailed;
1813
2055
  if (hasFailures) {
1814
2056
  const errors = [];
1815
2057
  if (vocabFailed) errors.push(`Vocabulary: ${vocabResult.error.message}`);
1816
2058
  if (sentenceFailed) errors.push(`Sentence structure: ${sentenceResult.error.message}`);
2059
+ if (smkFailed) errors.push(`Subject matter knowledge: ${smkResult.error.message}`);
1817
2060
  this.logger.error("Text complexity evaluation completed with errors", {
1818
2061
  evaluator: "text-complexity",
1819
2062
  operation: "evaluate",
@@ -1821,7 +2064,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
1821
2064
  errors,
1822
2065
  processingTimeMs: latencyMs
1823
2066
  });
1824
- if (vocabFailed && sentenceFailed) {
2067
+ if (vocabFailed && sentenceFailed && smkFailed) {
1825
2068
  throw new Error(`Text complexity evaluation failed: ${errors.join("; ")}`);
1826
2069
  }
1827
2070
  }
@@ -1842,7 +2085,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
1842
2085
  processingTimeMs: latencyMs,
1843
2086
  hasFailures
1844
2087
  });
1845
- return { vocabulary: vocabResult, sentenceStructure: sentenceResult };
2088
+ return { vocabulary: vocabResult, sentenceStructure: sentenceResult, subjectMatterKnowledge: smkResult };
1846
2089
  }
1847
2090
  /**
1848
2091
  * Run a sub-evaluator with error handling.
@@ -1861,6 +2104,6 @@ async function evaluateTextComplexity(text, grade, config) {
1861
2104
  return evaluator.evaluate(text, grade);
1862
2105
  }
1863
2106
 
1864
- export { APIError, AuthenticationError, ComplexityClassificationSchema, ConfigurationError, EvaluatorError, GradeBand, GradeLevelAppropriatenessEvaluator, GradeLevelAppropriatenessSchema, LogLevel, NetworkError, RateLimitError, SentenceAnalysisSchema, SentenceStructureEvaluator, TextComplexityEvaluator, TextComplexityLevel, TimeoutError, ValidationError, VocabularyEvaluator, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateGradeLevelAppropriateness, evaluateSentenceStructure, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };
2107
+ export { APIError, AuthenticationError, ComplexityClassificationSchema, ConfigurationError, EvaluatorError, GradeBand, GradeLevelAppropriatenessEvaluator, GradeLevelAppropriatenessSchema, LogLevel, NetworkError, RateLimitError, SentenceAnalysisSchema, SentenceStructureEvaluator, SmkEvaluator, TextComplexityEvaluator, TextComplexityLevel, TimeoutError, ValidationError, VocabularyEvaluator, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateGradeLevelAppropriateness, evaluateSentenceStructure, evaluateSmk, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };
1865
2108
  //# sourceMappingURL=index.js.map
1866
2109
  //# sourceMappingURL=index.js.map