@learning-commons/evaluators 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +161 -14
- package/dist/index.cjs +471 -8
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +192 -5
- package/dist/index.d.ts +192 -5
- package/dist/index.js +468 -9
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -1761,29 +1761,482 @@ async function evaluateGradeLevelAppropriateness(text, config) {
|
|
|
1761
1761
|
const evaluator = new GradeLevelAppropriatenessEvaluator(config);
|
|
1762
1762
|
return evaluator.evaluate(text);
|
|
1763
1763
|
}
|
|
1764
|
+
var SmkOutputSchema = z.object({
|
|
1765
|
+
identified_topics: z.array(z.string()).describe("List of major subjects/concepts found in the text."),
|
|
1766
|
+
curriculum_check: z.string().describe("Whether the topics are standard K-8 or specialized high school level."),
|
|
1767
|
+
assumptions_and_scaffolding: z.string().describe("What the author assumes the reader knows vs. what is explained."),
|
|
1768
|
+
friction_analysis: z.string().describe("Whether difficulty comes from vocabulary/structure or actual knowledge demands."),
|
|
1769
|
+
complexity_score: TextComplexityLevel.describe("The subject matter knowledge complexity level of the text"),
|
|
1770
|
+
reasoning: z.string().describe("A brief synthesis of why the text fits the chosen complexity level.")
|
|
1771
|
+
});
|
|
1772
|
+
|
|
1773
|
+
// ../../evals/prompts/subject-matter-knowledge/system.txt
|
|
1774
|
+
var system_default2 = `
|
|
1775
|
+
To perform the task of evaluating text complexity based on Subject Matter Knowledge (SMK), strictly adhere to the following instructions.
|
|
1776
|
+
Role
|
|
1777
|
+
You are an expert K-12 Literacy Pedagogue and Text Complexity Evaluator. Your specific focus is analyzing Subject Matter Knowledge (SMK) demands according to the Common Core Qualitative Text Complexity Rubric.
|
|
1778
|
+
Objective
|
|
1779
|
+
Analyze a provided text relative to a target grade_level. You must determine the extent of background knowledge required to comprehend the text. You must distinguish between Common/Standard knowledge (generally lower/moderate complexity) and Specialized/Theoretical knowledge (generally higher complexity).
|
|
1780
|
+
Input Data
|
|
1781
|
+
text: The passage to analyze.
|
|
1782
|
+
grade_level: The target student grade (integer).
|
|
1783
|
+
fk_score: Flesch-Kincaid Grade Level. Note: Use this only as a loose proxy for sentence structure. Do not let a high FK score artificially inflate the Subject Matter Knowledge score if the concepts remain simple.
|
|
1784
|
+
|
|
1785
|
+
1. The Rubric: Subject Matter Knowledge (SMK)
|
|
1786
|
+
1. Slightly Complex
|
|
1787
|
+
Scope: Everyday, practical knowledge, and Introduction to Skills.
|
|
1788
|
+
Concept Type: Concrete, directly observable, and familiar.
|
|
1789
|
+
Key Indicator: "How-to" texts involving familiar objects (e.g., drawing a cupboard, playing a game, family life). Even if specific terms (like "scale" or "measure") are used, if the application is on a common object, it remains Slightly Complex.
|
|
1790
|
+
2. Moderately Complex
|
|
1791
|
+
Scope: Common Discipline-Specific Knowledge or Narrative History.
|
|
1792
|
+
Definition: Topics widely introduced in K-8 curricula (Basic American History, Geography, Earth Science, Biology).
|
|
1793
|
+
Key Characteristic: The text bridges concrete descriptions with abstract themes (e.g., using farming to discuss justice), OR narrates historical events via sensory details.
|
|
1794
|
+
Spatial Reasoning: Texts requiring mental manipulation of maps/routes are generally Moderate, unless the object is a familiar household item (see Slightly Complex).
|
|
1795
|
+
3. Very Complex
|
|
1796
|
+
Scope: Specialized Discipline-Specific, Engineering Mechanics, or Political Theory.
|
|
1797
|
+
Definition: Topics characteristic of High School (9-12) curricula requiring abstract mental models.
|
|
1798
|
+
Key Characteristic: Requires understanding mechanisms (how physics works/propulsion), chemical composition, or undefined political stakes (specific treaties, alliances, or secularization without context).
|
|
1799
|
+
4. Exceedingly Complex
|
|
1800
|
+
Scope: Professional or Academic knowledge.
|
|
1801
|
+
|
|
1802
|
+
2. The Expert Mental Model (Decision Logic)
|
|
1803
|
+
Use these refined rules to categorize cases.
|
|
1804
|
+
Rule A: The "Layers of Meaning" Check
|
|
1805
|
+
Concrete -> Abstract (Moderate): The text describes concrete things (farming) to argue an abstract point (justice, rights).
|
|
1806
|
+
Concrete -> Concrete (Slightly): The text describes concrete things (lines, paper) to achieve a concrete result (drawing a cupboard). Do not over-rank practical instructions.
|
|
1807
|
+
Rule B: The Science & Engineering Boundary
|
|
1808
|
+
Observational (Moderate): Habitats, Water Cycle, observable traits, simple definitions.
|
|
1809
|
+
Mechanistic/Theoretical (Very): Engineering mechanics (how propulsion works via reaction), Instrumentation (using a spectroscope), or Chemical/Atomic theory.
|
|
1810
|
+
Test: Does the text explain how a machine functions using physical principles? If yes, it is Very Complex.
|
|
1811
|
+
Rule C: The History/Social Studies Boundary
|
|
1812
|
+
General/Narrative (Moderate):
|
|
1813
|
+
Sensory: Battle descriptions focusing on sights/sounds (flashes, smoke).
|
|
1814
|
+
Standard Topics: Immigration, Slavery, Government, Geography. Lists of nationalities or religions are "Common Knowledge" for Grades 6-8.
|
|
1815
|
+
Political/Contextual (Very):
|
|
1816
|
+
Implicit Context: Texts assuming knowledge of specific political factions, treaties, or the causes of events without explanation (e.g., "The Allies," "The Front," "The secularization of the clergy").
|
|
1817
|
+
Test: If the reader must know why two groups are fighting or the specific political history of a revolution to understand the text, it is Very Complex.
|
|
1818
|
+
Rule D: The "Technical vs. Practical" Trap
|
|
1819
|
+
Scenario: A text teaches a technical skill (e.g., Technical Drawing/Technology) but applies it to a familiar object (a cupboard).
|
|
1820
|
+
Decision: Slightly Complex.
|
|
1821
|
+
Reasoning: Do not confuse "Technical Vocabulary" (scale, thick lines) with "Theoretical Complexity." If the underlying concept is familiar (furniture), the SMK load is low.
|
|
1822
|
+
|
|
1823
|
+
3. Critical Calibration Examples
|
|
1824
|
+
Text: "Make a rough sketch... How many shelves should the cupboard have?" (Grade 2) -> Slightly Complex.
|
|
1825
|
+
Reasoning: (Rule D/Rule A) Although it mentions "scale" and "technology," the task is concrete and relies on everyday knowledge.
|
|
1826
|
+
Text: "Hydraulic propulsion works by sucking water at the bow and forcing it sternward." (Grade 10) -> Very Complex.
|
|
1827
|
+
Reasoning: (Rule B) Explains a mechanism using physics principles.
|
|
1828
|
+
Text: "The Allies fight the enemy's cavalry; we remember the hospitality to priests during the Revolution." (Grade 6) -> Very Complex.
|
|
1829
|
+
Reasoning: (Rule C) Assumes undefined knowledge of WWI alliances and the specific political history of the French Revolution.
|
|
1830
|
+
Text: "Immigrants from Poland, Italy, and Russia arrived. Most were Catholic or Orthodox." (Grade 7) -> Moderately Complex.
|
|
1831
|
+
Reasoning: (Rule C) Standard K-8 topic. Lists of nationalities are content vocabulary, not specialized theory.
|
|
1832
|
+
|
|
1833
|
+
4. Output Format
|
|
1834
|
+
Return your analysis in a valid JSON object. Do not include markdown formatting.
|
|
1835
|
+
Keys:
|
|
1836
|
+
- identified_topics: List[str] identifying the core subjects.
|
|
1837
|
+
- curriculum_check: String explaining if the topics are "Standard/General" (typical for K-8) or "Specialized/High School" (typical for 9-12).
|
|
1838
|
+
- assumptions_and_scaffolding: String analyzing what the author assumes the reader knows vs what is explained.
|
|
1839
|
+
- friction_analysis: String discussing the gap between Concrete description and Abstract meaning.
|
|
1840
|
+
- complexity_score: String (One of: slightly_complex, moderately_complex, very_complex, exceedingly_complex).
|
|
1841
|
+
- reasoning: String synthesizing the decision.
|
|
1842
|
+
|
|
1843
|
+
`;
|
|
1844
|
+
|
|
1845
|
+
// ../../evals/prompts/subject-matter-knowledge/user.txt
|
|
1846
|
+
var user_default2 = "Analyze:\nText: {text}\nGrade: {grade}\nFK Score: {fk_score}";
|
|
1847
|
+
|
|
1848
|
+
// src/prompts/subject-matter-knowledge/index.ts
|
|
1849
|
+
function getSystemPrompt3() {
|
|
1850
|
+
return system_default2;
|
|
1851
|
+
}
|
|
1852
|
+
function getUserPrompt3(text, grade, fkScore) {
|
|
1853
|
+
return user_default2.replaceAll("{text}", text).replaceAll("{grade}", grade).replaceAll("{fk_score}", fkScore.toString());
|
|
1854
|
+
}
|
|
1855
|
+
|
|
1856
|
+
// src/evaluators/smk.ts
|
|
1857
|
+
var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
1858
|
+
static metadata = {
|
|
1859
|
+
id: "subject-matter-knowledge",
|
|
1860
|
+
name: "Subject Matter Knowledge",
|
|
1861
|
+
description: "Evaluates background knowledge demands of educational texts relative to grade level",
|
|
1862
|
+
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1863
|
+
requiresGoogleKey: true,
|
|
1864
|
+
requiresOpenAIKey: false
|
|
1865
|
+
};
|
|
1866
|
+
provider;
|
|
1867
|
+
constructor(config) {
|
|
1868
|
+
super(config);
|
|
1869
|
+
this.provider = createProvider({
|
|
1870
|
+
type: "google",
|
|
1871
|
+
model: "gemini-3-flash-preview",
|
|
1872
|
+
apiKey: config.googleApiKey,
|
|
1873
|
+
maxRetries: this.config.maxRetries
|
|
1874
|
+
});
|
|
1875
|
+
}
|
|
1876
|
+
/**
|
|
1877
|
+
* Evaluate subject matter knowledge complexity for a given text and grade level
|
|
1878
|
+
*
|
|
1879
|
+
* @param text - The text to evaluate
|
|
1880
|
+
* @param grade - The target grade level (3-12)
|
|
1881
|
+
* @returns Evaluation result with complexity score and detailed analysis
|
|
1882
|
+
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
1883
|
+
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
1884
|
+
*/
|
|
1885
|
+
async evaluate(text, grade) {
|
|
1886
|
+
this.logger.info("Starting SMK evaluation", {
|
|
1887
|
+
evaluator: "subject-matter-knowledge",
|
|
1888
|
+
operation: "evaluate",
|
|
1889
|
+
grade,
|
|
1890
|
+
textLength: text.length
|
|
1891
|
+
});
|
|
1892
|
+
const startTime = Date.now();
|
|
1893
|
+
const stageDetails = [];
|
|
1894
|
+
try {
|
|
1895
|
+
this.validateText(text);
|
|
1896
|
+
this.validateGrade(grade, new Set(_SmkEvaluator.metadata.supportedGrades));
|
|
1897
|
+
this.logger.debug("Evaluating subject matter knowledge complexity", {
|
|
1898
|
+
evaluator: "subject-matter-knowledge",
|
|
1899
|
+
operation: "smk_evaluation"
|
|
1900
|
+
});
|
|
1901
|
+
const fkScore = calculateFleschKincaidGrade(text);
|
|
1902
|
+
const response = await this.evaluateSmk(text, grade, fkScore);
|
|
1903
|
+
stageDetails.push({
|
|
1904
|
+
stage: "smk_evaluation",
|
|
1905
|
+
provider: "google:gemini-3-flash-preview",
|
|
1906
|
+
latency_ms: response.latencyMs,
|
|
1907
|
+
token_usage: {
|
|
1908
|
+
input_tokens: response.usage.inputTokens,
|
|
1909
|
+
output_tokens: response.usage.outputTokens
|
|
1910
|
+
}
|
|
1911
|
+
});
|
|
1912
|
+
const latencyMs = Date.now() - startTime;
|
|
1913
|
+
const totalTokenUsage = {
|
|
1914
|
+
input_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.input_tokens || 0), 0),
|
|
1915
|
+
output_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.output_tokens || 0), 0)
|
|
1916
|
+
};
|
|
1917
|
+
const result = {
|
|
1918
|
+
score: response.data.complexity_score,
|
|
1919
|
+
reasoning: response.data.reasoning,
|
|
1920
|
+
metadata: {
|
|
1921
|
+
model: "google:gemini-3-flash-preview",
|
|
1922
|
+
processingTimeMs: latencyMs
|
|
1923
|
+
},
|
|
1924
|
+
_internal: response.data
|
|
1925
|
+
};
|
|
1926
|
+
this.sendTelemetry({
|
|
1927
|
+
status: "success",
|
|
1928
|
+
latencyMs,
|
|
1929
|
+
textLength: text.length,
|
|
1930
|
+
grade,
|
|
1931
|
+
provider: "google:gemini-3-flash-preview",
|
|
1932
|
+
tokenUsage: totalTokenUsage,
|
|
1933
|
+
metadata: {
|
|
1934
|
+
stage_details: stageDetails
|
|
1935
|
+
},
|
|
1936
|
+
inputText: text
|
|
1937
|
+
}).catch(() => {
|
|
1938
|
+
});
|
|
1939
|
+
this.logger.info("SMK evaluation completed successfully", {
|
|
1940
|
+
evaluator: "subject-matter-knowledge",
|
|
1941
|
+
operation: "evaluate",
|
|
1942
|
+
grade,
|
|
1943
|
+
score: result.score,
|
|
1944
|
+
processingTimeMs: latencyMs
|
|
1945
|
+
});
|
|
1946
|
+
return result;
|
|
1947
|
+
} catch (error) {
|
|
1948
|
+
const latencyMs = Date.now() - startTime;
|
|
1949
|
+
this.logger.error("SMK evaluation failed", {
|
|
1950
|
+
evaluator: "subject-matter-knowledge",
|
|
1951
|
+
operation: "evaluate",
|
|
1952
|
+
grade,
|
|
1953
|
+
error: error instanceof Error ? error : void 0,
|
|
1954
|
+
processingTimeMs: latencyMs,
|
|
1955
|
+
completedStages: stageDetails.length
|
|
1956
|
+
});
|
|
1957
|
+
const totalTokenUsage = stageDetails.length > 0 ? {
|
|
1958
|
+
input_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.input_tokens || 0), 0),
|
|
1959
|
+
output_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.output_tokens || 0), 0)
|
|
1960
|
+
} : void 0;
|
|
1961
|
+
this.sendTelemetry({
|
|
1962
|
+
status: "error",
|
|
1963
|
+
latencyMs,
|
|
1964
|
+
textLength: text.length,
|
|
1965
|
+
grade,
|
|
1966
|
+
provider: "google:gemini-3-flash-preview",
|
|
1967
|
+
tokenUsage: totalTokenUsage,
|
|
1968
|
+
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
1969
|
+
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
1970
|
+
inputText: text
|
|
1971
|
+
}).catch(() => {
|
|
1972
|
+
});
|
|
1973
|
+
if (error instanceof ValidationError) {
|
|
1974
|
+
throw error;
|
|
1975
|
+
}
|
|
1976
|
+
throw wrapProviderError(error, "SMK evaluation failed");
|
|
1977
|
+
}
|
|
1978
|
+
}
|
|
1979
|
+
/**
|
|
1980
|
+
* Run the SMK evaluation LLM call
|
|
1981
|
+
*/
|
|
1982
|
+
async evaluateSmk(text, grade, fkScore) {
|
|
1983
|
+
const response = await this.provider.generateStructured({
|
|
1984
|
+
messages: [
|
|
1985
|
+
{ role: "system", content: getSystemPrompt3() },
|
|
1986
|
+
{ role: "user", content: getUserPrompt3(text, grade, fkScore) }
|
|
1987
|
+
],
|
|
1988
|
+
schema: SmkOutputSchema,
|
|
1989
|
+
temperature: 0
|
|
1990
|
+
});
|
|
1991
|
+
return {
|
|
1992
|
+
data: response.data,
|
|
1993
|
+
usage: response.usage,
|
|
1994
|
+
latencyMs: response.latencyMs
|
|
1995
|
+
};
|
|
1996
|
+
}
|
|
1997
|
+
};
|
|
1998
|
+
async function evaluateSmk(text, grade, config) {
|
|
1999
|
+
const evaluator = new SmkEvaluator(config);
|
|
2000
|
+
return evaluator.evaluate(text, grade);
|
|
2001
|
+
}
|
|
2002
|
+
var ConventionalityOutputSchema = z.object({
|
|
2003
|
+
conventionality_features: z.array(z.string()).describe("The specific language features driving the complexity (e.g., literal narrative, concrete actions, sustained irony, abstract qualities) with direct quotes from the text."),
|
|
2004
|
+
grade_context: z.string().describe("How the conventionality demands compare to general expectations for the provided target grade."),
|
|
2005
|
+
instructional_insights: z.string().describe("Actionable pedagogical suggestions for scaffolding the conventionality features in the classroom."),
|
|
2006
|
+
complexity_score: TextComplexityLevel.describe("The conventionality complexity level of the text"),
|
|
2007
|
+
reasoning: z.string().describe("A detailed explanation of the rating, citing specific features in the text and referencing the expert guardrails.")
|
|
2008
|
+
});
|
|
2009
|
+
|
|
2010
|
+
// ../../evals/prompts/conventionality/system.txt
|
|
2011
|
+
var system_default3 = `Role
|
|
2012
|
+
You are an expert reading teacher and text complexity evaluator. Your task is to evaluate the "Conventionality" of a text and assign it a complexity level based on a 4-point scale, carefully factoring in the target grade level.
|
|
2013
|
+
|
|
2014
|
+
Objective
|
|
2015
|
+
Measure how explicit, literal, and straightforward the text's meaning is, versus how abstract, ironic, figurative, or archaic it is. Focus on the hiddenness of the meaning, the use of conceptual framing, the reliance on abstract reasoning, and the familiarity of the expression for the target grade.
|
|
2016
|
+
|
|
2017
|
+
Complexity Levels
|
|
2018
|
+
- Slightly Complex: Explicit, literal, straightforward, easy to understand. Meaning is entirely on the surface. The language is concrete, and the meaning is clear and procedural, mostly referring to observable materials and actions. Contains no symbolic or ironic language, and conceptual interpretation is not required. Contains limited figurative language that is common and easy to comprehend at the target grade level.
|
|
2019
|
+
- Moderately Complex: Largely explicit and easy to understand with some occasions for more complex meaning. May contain a noticeable amount of archaic/dated phrasing, formal historical prose, vocabulary demands, background knowledge requirements, or expressions that are less familiar to the target grade level, which might make the text feel vague or slightly challenging.
|
|
2020
|
+
- Very Complex: Fairly complex; contains sustained abstract language, conceptual framing, rhetorical idealization, ironic comparisons, or central metaphors that drive the meaning of the text. Addresses concepts, beliefs, and abstract qualities rather than just concrete objects. The tone or underlying message requires interpretation, even if the surface message is clear.
|
|
2021
|
+
- Exceedingly Complex: Dense and complex; contains considerable abstract, ironic, and/or figurative language. Meaning is heavily hidden, deeply conceptual, or relies heavily on complex rhetorical devices.
|
|
2022
|
+
|
|
2023
|
+
Essential Evaluation Rules
|
|
2024
|
+
1. Concrete & Procedural Texts: Texts that are highly concrete, clear, and procedural (e.g., describing observable materials, mechanical processes, or physical actions) should typically be rated "Slightly Complex."
|
|
2025
|
+
|
|
2026
|
+
2. Grade-Level Anchoring and Vague Narratives: Always consider the target grade. A literal historical narrative that might be straightforward for older students can be "Moderately Complex" for younger students (e.g., 4th graders) if it involves less familiar expressions, older contexts (e.g., wagon loads, traveling by horseback), vocabulary demands, and background knowledge requirements that make the text feel vague or slightly demanding for that age group.
|
|
2027
|
+
|
|
2028
|
+
3. Rhetorical Idealization and Abstract Qualities: If an entire argument or narrative is built around abstract qualities (e.g., national character, bravery, liberty) and uses repeated figurative language or personification to portray a subject in a certain idealized way, rate the text as "Very Complex." Even if the figurative language is easy to interpret, the need to interpret the rhetorical tone and sustained abstract focus elevates the complexity beyond level two.
|
|
2029
|
+
|
|
2030
|
+
4. Common Idioms and Grade-Level Appropriateness: Do NOT elevate a text to "Moderately Complex" simply because it contains a few common idiomatic expressions. If these expressions are widely known and easy for the target grade to understand without making the text feel vague, the text remains "Slightly Complex."
|
|
2031
|
+
|
|
2032
|
+
5. Conversational and Hypothetical Framing: Using a second-person conversational hook (e.g., "Imagine you are...") to explain a concept is a standard, literal device for engaging readers. It does not constitute complex conceptual framing.
|
|
2033
|
+
|
|
2034
|
+
6. Sustained vs. Occasional Impact: If abstract language, figurative phrasing, irony, or conceptual framing is sustained throughout the text and central to the argument/meaning, the text is Very Complex. Reserve Moderately Complex for texts where the explicit meaning dominates but the expression, vocabulary, or archaic language provides a moderate conventionality challenge.
|
|
2035
|
+
|
|
2036
|
+
7. Central Metaphors and Conceptual Framing: When an author uses a central metaphor to explain a concept or uses figurative phrasing to explain how things "work," this abstract reasoning drives the meaning, elevating the text to Very Complex.
|
|
2037
|
+
|
|
2038
|
+
8. Irony and Abstract Comparisons: Texts that rely on sustained irony, especially through comparative arguments, are inherently Very Complex for younger students.
|
|
2039
|
+
|
|
2040
|
+
9. Isolate Conventionality from Vocabulary: Do not inflate the Conventionality score just because the text uses archaic, dated, or highly academic vocabulary.
|
|
2041
|
+
|
|
2042
|
+
Input Format
|
|
2043
|
+
You will receive:
|
|
2044
|
+
- text: The passage to evaluate.
|
|
2045
|
+
- grade_level: The target student grade level.
|
|
2046
|
+
- fk_score: The Flesch-Kincaid readability score.
|
|
2047
|
+
|
|
2048
|
+
Output Format
|
|
2049
|
+
Provide a JSON object containing ONLY the following keys:
|
|
2050
|
+
- complexity_score: (String) One of the 4 scale levels exactly as formatted: 'slightly_complex', 'moderately_complex', 'very_complex', or 'exceedingly_complex'.
|
|
2051
|
+
- reasoning: (String) A detailed explanation of the rating, citing specific features in the text and referencing the expert guardrails (e.g., noting if the text relies on abstract qualities/rhetorical idealization, if vocabulary/background knowledge demands make a literal text vague for the grade level, or if it is strictly concrete/procedural).
|
|
2052
|
+
- conventionality_features: (List of Strings) The specific language features driving the complexity (e.g., literal narrative, concrete actions, less familiar expressions, sustained irony, abstract qualities, rhetorical idealization, archaic phrasing) with direct quotes from the text.
|
|
2053
|
+
- grade_context: (String) How the conventionality demands compare to general expectations for the provided target grade.
|
|
2054
|
+
- instructional_insights: (String) Actionable pedagogical suggestions for scaffolding the conventionality features in the classroom.`;
|
|
2055
|
+
|
|
2056
|
+
// ../../evals/prompts/conventionality/user.txt
|
|
2057
|
+
var user_default3 = "Analyze:\nText: {text}\nGrade: {grade}\nFK Score: {fk_score}";
|
|
2058
|
+
|
|
2059
|
+
// src/prompts/conventionality/index.ts
|
|
2060
|
+
function getSystemPrompt4() {
|
|
2061
|
+
return system_default3;
|
|
2062
|
+
}
|
|
2063
|
+
function getUserPrompt4(text, grade, fkScore) {
|
|
2064
|
+
return user_default3.replaceAll("{text}", text).replaceAll("{grade}", grade).replaceAll("{fk_score}", fkScore.toString());
|
|
2065
|
+
}
|
|
2066
|
+
|
|
2067
|
+
// src/evaluators/conventionality.ts
|
|
2068
|
+
var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvaluator {
|
|
2069
|
+
static metadata = {
|
|
2070
|
+
id: "conventionality",
|
|
2071
|
+
name: "Conventionality",
|
|
2072
|
+
description: "Evaluates how explicit, literal, and straightforward a text's meaning is relative to grade level",
|
|
2073
|
+
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
2074
|
+
requiresGoogleKey: true,
|
|
2075
|
+
requiresOpenAIKey: false
|
|
2076
|
+
};
|
|
2077
|
+
provider;
|
|
2078
|
+
constructor(config) {
|
|
2079
|
+
super(config);
|
|
2080
|
+
this.provider = createProvider({
|
|
2081
|
+
type: "google",
|
|
2082
|
+
model: "gemini-3-flash-preview",
|
|
2083
|
+
apiKey: config.googleApiKey,
|
|
2084
|
+
maxRetries: this.config.maxRetries
|
|
2085
|
+
});
|
|
2086
|
+
}
|
|
2087
|
+
/**
|
|
2088
|
+
* Evaluate conventionality complexity for a given text and grade level
|
|
2089
|
+
*
|
|
2090
|
+
* @param text - The text to evaluate
|
|
2091
|
+
* @param grade - The target grade level (3-12)
|
|
2092
|
+
* @returns Evaluation result with complexity score and detailed analysis
|
|
2093
|
+
* @throws {ValidationError} If text is empty, too short/long, or grade is invalid
|
|
2094
|
+
* @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
|
|
2095
|
+
*/
|
|
2096
|
+
async evaluate(text, grade) {
|
|
2097
|
+
this.logger.info("Starting Conventionality evaluation", {
|
|
2098
|
+
evaluator: "conventionality",
|
|
2099
|
+
operation: "evaluate",
|
|
2100
|
+
grade,
|
|
2101
|
+
textLength: text.length
|
|
2102
|
+
});
|
|
2103
|
+
const startTime = Date.now();
|
|
2104
|
+
const stageDetails = [];
|
|
2105
|
+
try {
|
|
2106
|
+
this.validateText(text);
|
|
2107
|
+
this.validateGrade(grade, new Set(_ConventionalityEvaluator.metadata.supportedGrades));
|
|
2108
|
+
this.logger.debug("Evaluating conventionality complexity", {
|
|
2109
|
+
evaluator: "conventionality",
|
|
2110
|
+
operation: "conventionality_evaluation"
|
|
2111
|
+
});
|
|
2112
|
+
const fkScore = calculateFleschKincaidGrade(text);
|
|
2113
|
+
const response = await this.evaluateConventionality(text, grade, fkScore);
|
|
2114
|
+
stageDetails.push({
|
|
2115
|
+
stage: "conventionality_evaluation",
|
|
2116
|
+
provider: "google:gemini-3-flash-preview",
|
|
2117
|
+
latency_ms: response.latencyMs,
|
|
2118
|
+
token_usage: {
|
|
2119
|
+
input_tokens: response.usage.inputTokens,
|
|
2120
|
+
output_tokens: response.usage.outputTokens
|
|
2121
|
+
}
|
|
2122
|
+
});
|
|
2123
|
+
const latencyMs = Date.now() - startTime;
|
|
2124
|
+
const totalTokenUsage = {
|
|
2125
|
+
input_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.input_tokens || 0), 0),
|
|
2126
|
+
output_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.output_tokens || 0), 0)
|
|
2127
|
+
};
|
|
2128
|
+
const result = {
|
|
2129
|
+
score: response.data.complexity_score,
|
|
2130
|
+
reasoning: response.data.reasoning,
|
|
2131
|
+
metadata: {
|
|
2132
|
+
model: "google:gemini-3-flash-preview",
|
|
2133
|
+
processingTimeMs: latencyMs
|
|
2134
|
+
},
|
|
2135
|
+
_internal: response.data
|
|
2136
|
+
};
|
|
2137
|
+
this.sendTelemetry({
|
|
2138
|
+
status: "success",
|
|
2139
|
+
latencyMs,
|
|
2140
|
+
textLength: text.length,
|
|
2141
|
+
grade,
|
|
2142
|
+
provider: "google:gemini-3-flash-preview",
|
|
2143
|
+
tokenUsage: totalTokenUsage,
|
|
2144
|
+
metadata: {
|
|
2145
|
+
stage_details: stageDetails
|
|
2146
|
+
},
|
|
2147
|
+
inputText: text
|
|
2148
|
+
}).catch(() => {
|
|
2149
|
+
});
|
|
2150
|
+
this.logger.info("Conventionality evaluation completed successfully", {
|
|
2151
|
+
evaluator: "conventionality",
|
|
2152
|
+
operation: "evaluate",
|
|
2153
|
+
grade,
|
|
2154
|
+
score: result.score,
|
|
2155
|
+
processingTimeMs: latencyMs
|
|
2156
|
+
});
|
|
2157
|
+
return result;
|
|
2158
|
+
} catch (error) {
|
|
2159
|
+
const latencyMs = Date.now() - startTime;
|
|
2160
|
+
this.logger.error("Conventionality evaluation failed", {
|
|
2161
|
+
evaluator: "conventionality",
|
|
2162
|
+
operation: "evaluate",
|
|
2163
|
+
grade,
|
|
2164
|
+
error: error instanceof Error ? error : void 0,
|
|
2165
|
+
processingTimeMs: latencyMs,
|
|
2166
|
+
completedStages: stageDetails.length
|
|
2167
|
+
});
|
|
2168
|
+
const totalTokenUsage = stageDetails.length > 0 ? {
|
|
2169
|
+
input_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.input_tokens || 0), 0),
|
|
2170
|
+
output_tokens: stageDetails.reduce((sum, s) => sum + (s.token_usage?.output_tokens || 0), 0)
|
|
2171
|
+
} : void 0;
|
|
2172
|
+
this.sendTelemetry({
|
|
2173
|
+
status: "error",
|
|
2174
|
+
latencyMs,
|
|
2175
|
+
textLength: text.length,
|
|
2176
|
+
grade,
|
|
2177
|
+
provider: "google:gemini-3-flash-preview",
|
|
2178
|
+
tokenUsage: totalTokenUsage,
|
|
2179
|
+
errorCode: error instanceof Error ? error.name : "UnknownError",
|
|
2180
|
+
metadata: stageDetails.length > 0 ? { stage_details: stageDetails } : void 0,
|
|
2181
|
+
inputText: text
|
|
2182
|
+
}).catch(() => {
|
|
2183
|
+
});
|
|
2184
|
+
if (error instanceof ValidationError) {
|
|
2185
|
+
throw error;
|
|
2186
|
+
}
|
|
2187
|
+
throw wrapProviderError(error, "Conventionality evaluation failed");
|
|
2188
|
+
}
|
|
2189
|
+
}
|
|
2190
|
+
/**
|
|
2191
|
+
* Run the Conventionality evaluation LLM call
|
|
2192
|
+
*/
|
|
2193
|
+
async evaluateConventionality(text, grade, fkScore) {
|
|
2194
|
+
const response = await this.provider.generateStructured({
|
|
2195
|
+
messages: [
|
|
2196
|
+
{ role: "system", content: getSystemPrompt4() },
|
|
2197
|
+
{ role: "user", content: getUserPrompt4(text, grade, fkScore) }
|
|
2198
|
+
],
|
|
2199
|
+
schema: ConventionalityOutputSchema,
|
|
2200
|
+
temperature: 0
|
|
2201
|
+
});
|
|
2202
|
+
return {
|
|
2203
|
+
data: response.data,
|
|
2204
|
+
usage: response.usage,
|
|
2205
|
+
latencyMs: response.latencyMs
|
|
2206
|
+
};
|
|
2207
|
+
}
|
|
2208
|
+
};
|
|
2209
|
+
async function evaluateConventionality(text, grade, config) {
|
|
2210
|
+
const evaluator = new ConventionalityEvaluator(config);
|
|
2211
|
+
return evaluator.evaluate(text, grade);
|
|
2212
|
+
}
|
|
1764
2213
|
var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluator {
|
|
1765
2214
|
static metadata = {
|
|
1766
2215
|
id: "text-complexity",
|
|
1767
2216
|
name: "Text Complexity",
|
|
1768
|
-
description: "Composite evaluator analyzing vocabulary
|
|
2217
|
+
description: "Composite evaluator analyzing vocabulary, sentence structure, subject matter knowledge, and conventionality complexity",
|
|
1769
2218
|
supportedGrades: ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"],
|
|
1770
2219
|
requiresGoogleKey: true,
|
|
1771
2220
|
requiresOpenAIKey: true
|
|
1772
2221
|
};
|
|
1773
2222
|
vocabularyEvaluator;
|
|
1774
2223
|
sentenceStructureEvaluator;
|
|
2224
|
+
smkEvaluator;
|
|
2225
|
+
conventionalityEvaluator;
|
|
1775
2226
|
limit;
|
|
1776
2227
|
constructor(config) {
|
|
1777
2228
|
super(config);
|
|
1778
2229
|
this.vocabularyEvaluator = new VocabularyEvaluator(config);
|
|
1779
2230
|
this.sentenceStructureEvaluator = new SentenceStructureEvaluator(config);
|
|
2231
|
+
this.smkEvaluator = new SmkEvaluator(config);
|
|
2232
|
+
this.conventionalityEvaluator = new ConventionalityEvaluator(config);
|
|
1780
2233
|
this.limit = pLimit(3);
|
|
1781
2234
|
}
|
|
1782
2235
|
/**
|
|
1783
2236
|
* Evaluate text complexity for a given text and grade level
|
|
1784
2237
|
*
|
|
1785
|
-
* Runs vocabulary
|
|
1786
|
-
* If
|
|
2238
|
+
* Runs vocabulary, sentence structure, and SMK evaluations in parallel with concurrency control.
|
|
2239
|
+
* If all three sub-evaluators fail, throws an error. Otherwise returns a result map where
|
|
1787
2240
|
* failed sub-evaluators are represented as `{ error: Error }`.
|
|
1788
2241
|
*
|
|
1789
2242
|
* @param text - The text to evaluate
|
|
@@ -1802,18 +2255,24 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
|
|
|
1802
2255
|
this.validateText(text);
|
|
1803
2256
|
this.validateGrade(grade, new Set(_TextComplexityEvaluator.metadata.supportedGrades));
|
|
1804
2257
|
const startTime = Date.now();
|
|
1805
|
-
const [vocabResult, sentenceResult] = await Promise.all([
|
|
2258
|
+
const [vocabResult, sentenceResult, smkResult, conventionalityResult] = await Promise.all([
|
|
1806
2259
|
this.limit(() => this.runSubEvaluator(this.vocabularyEvaluator, text, grade)),
|
|
1807
|
-
this.limit(() => this.runSubEvaluator(this.sentenceStructureEvaluator, text, grade))
|
|
2260
|
+
this.limit(() => this.runSubEvaluator(this.sentenceStructureEvaluator, text, grade)),
|
|
2261
|
+
this.limit(() => this.runSubEvaluator(this.smkEvaluator, text, grade)),
|
|
2262
|
+
this.limit(() => this.runSubEvaluator(this.conventionalityEvaluator, text, grade))
|
|
1808
2263
|
]);
|
|
1809
2264
|
const latencyMs = Date.now() - startTime;
|
|
1810
2265
|
const vocabFailed = "error" in vocabResult;
|
|
1811
2266
|
const sentenceFailed = "error" in sentenceResult;
|
|
1812
|
-
const
|
|
2267
|
+
const smkFailed = "error" in smkResult;
|
|
2268
|
+
const conventionalityFailed = "error" in conventionalityResult;
|
|
2269
|
+
const hasFailures = vocabFailed || sentenceFailed || smkFailed || conventionalityFailed;
|
|
1813
2270
|
if (hasFailures) {
|
|
1814
2271
|
const errors = [];
|
|
1815
2272
|
if (vocabFailed) errors.push(`Vocabulary: ${vocabResult.error.message}`);
|
|
1816
2273
|
if (sentenceFailed) errors.push(`Sentence structure: ${sentenceResult.error.message}`);
|
|
2274
|
+
if (smkFailed) errors.push(`Subject matter knowledge: ${smkResult.error.message}`);
|
|
2275
|
+
if (conventionalityFailed) errors.push(`Conventionality: ${conventionalityResult.error.message}`);
|
|
1817
2276
|
this.logger.error("Text complexity evaluation completed with errors", {
|
|
1818
2277
|
evaluator: "text-complexity",
|
|
1819
2278
|
operation: "evaluate",
|
|
@@ -1821,7 +2280,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
|
|
|
1821
2280
|
errors,
|
|
1822
2281
|
processingTimeMs: latencyMs
|
|
1823
2282
|
});
|
|
1824
|
-
if (vocabFailed && sentenceFailed) {
|
|
2283
|
+
if (vocabFailed && sentenceFailed && smkFailed && conventionalityFailed) {
|
|
1825
2284
|
throw new Error(`Text complexity evaluation failed: ${errors.join("; ")}`);
|
|
1826
2285
|
}
|
|
1827
2286
|
}
|
|
@@ -1842,7 +2301,7 @@ var TextComplexityEvaluator = class _TextComplexityEvaluator extends BaseEvaluat
|
|
|
1842
2301
|
processingTimeMs: latencyMs,
|
|
1843
2302
|
hasFailures
|
|
1844
2303
|
});
|
|
1845
|
-
return { vocabulary: vocabResult, sentenceStructure: sentenceResult };
|
|
2304
|
+
return { vocabulary: vocabResult, sentenceStructure: sentenceResult, subjectMatterKnowledge: smkResult, conventionality: conventionalityResult };
|
|
1846
2305
|
}
|
|
1847
2306
|
/**
|
|
1848
2307
|
* Run a sub-evaluator with error handling.
|
|
@@ -1861,6 +2320,6 @@ async function evaluateTextComplexity(text, grade, config) {
|
|
|
1861
2320
|
return evaluator.evaluate(text, grade);
|
|
1862
2321
|
}
|
|
1863
2322
|
|
|
1864
|
-
export { APIError, AuthenticationError, ComplexityClassificationSchema, ConfigurationError, EvaluatorError, GradeBand, GradeLevelAppropriatenessEvaluator, GradeLevelAppropriatenessSchema, LogLevel, NetworkError, RateLimitError, SentenceAnalysisSchema, SentenceStructureEvaluator, TextComplexityEvaluator, TextComplexityLevel, TimeoutError, ValidationError, VocabularyEvaluator, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateGradeLevelAppropriateness, evaluateSentenceStructure, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };
|
|
2323
|
+
export { APIError, AuthenticationError, ComplexityClassificationSchema, ConfigurationError, ConventionalityEvaluator, EvaluatorError, GradeBand, GradeLevelAppropriatenessEvaluator, GradeLevelAppropriatenessSchema, LogLevel, NetworkError, RateLimitError, SentenceAnalysisSchema, SentenceStructureEvaluator, SmkEvaluator, TextComplexityEvaluator, TextComplexityLevel, TimeoutError, ValidationError, VocabularyEvaluator, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateConventionality, evaluateGradeLevelAppropriateness, evaluateSentenceStructure, evaluateSmk, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };
|
|
1865
2324
|
//# sourceMappingURL=index.js.map
|
|
1866
2325
|
//# sourceMappingURL=index.js.map
|