npm - @learning-commons/evaluators - Versions diffs - 0.1.0 - Mend

@learning-commons/evaluators 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/index.d.cts ADDED Viewed

@@ -0,0 +1,1142 @@
+import { z } from 'zod';
+/**
+ * Shared complexity levels used across all text complexity evaluators
+ * (Vocabulary, Sentence Structure, and any future sub-evaluators)
+ */
+declare const TextComplexityLevel: z.ZodEnum<["Slightly complex", "Moderately complex", "Very complex", "Exceedingly complex"]>;
+type TextComplexityLevel = z.infer<typeof TextComplexityLevel>;
+/**
+ * Metadata attached to all evaluation results
+ */
+interface EvaluationMetadata {
+    model: string;
+    processingTimeMs: number;
+}
+/**
+ * Base evaluation result structure
+ */
+interface EvaluationResult<TScore = string, TInternal = unknown> {
+    score: TScore;
+    reasoning: string;
+    metadata: EvaluationMetadata;
+    _internal?: TInternal;
+}
+/**
+ * Error type for failed evaluations
+ */
+interface EvaluationError {
+    error: string;
+    input: {
+        text: string;
+        grade?: string;
+    };
+}
+/**
+ * Valid grade bands for grade level appropriateness evaluation
+ */
+declare const GradeBand: z.ZodEnum<["K-1", "2-3", "4-5", "6-8", "9-10", "11-CCR"]>;
+type GradeBand = z.infer<typeof GradeBand>;
+/**
+ * Output schema for Grade Level Appropriateness evaluation
+ * Matches Python OutputRanges model
+ */
+declare const GradeLevelAppropriatenessSchema: z.ZodObject<{
+    reasoning: z.ZodString;
+    grade: z.ZodEnum<["K-1", "2-3", "4-5", "6-8", "9-10", "11-CCR"]>;
+    alternative_grade: z.ZodEnum<["K-1", "2-3", "4-5", "6-8", "9-10", "11-CCR"]>;
+    scaffolding_needed: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+    reasoning: string;
+    grade: "K-1" | "2-3" | "4-5" | "6-8" | "9-10" | "11-CCR";
+    alternative_grade: "K-1" | "2-3" | "4-5" | "6-8" | "9-10" | "11-CCR";
+    scaffolding_needed: string;
+}, {
+    reasoning: string;
+    grade: "K-1" | "2-3" | "4-5" | "6-8" | "9-10" | "11-CCR";
+    alternative_grade: "K-1" | "2-3" | "4-5" | "6-8" | "9-10" | "11-CCR";
+    scaffolding_needed: string;
+}>;
+type GradeLevelAppropriatenessInternal = z.infer<typeof GradeLevelAppropriatenessSchema>;
+/**
+ * Custom error types for the Evaluators SDK
+ *
+ * This module provides a hierarchy of error types to help users
+ * distinguish between different error scenarios and implement
+ * appropriate error handling strategies.
+ */
+/**
+ * Base error class for all evaluator errors
+ */
+declare class EvaluatorError extends Error {
+    readonly code?: string | undefined;
+    constructor(message: string, code?: string | undefined);
+}
+/**
+ * Configuration error - thrown when the evaluator is misconfigured
+ * These are developer errors (e.g. missing API keys) that should NOT be retried
+ *
+ * @example
+ * ```typescript
+ * try {
+ *   const evaluator = new VocabularyEvaluator({ googleApiKey: '' });
+ * } catch (error) {
+ *   if (error instanceof ConfigurationError) {
+ *     console.error('Check your evaluator config:', error.message);
+ *   }
+ * }
+ * ```
+ */
+declare class ConfigurationError extends EvaluatorError {
+    constructor(message: string);
+}
+/**
+ * Validation error - thrown when input validation fails
+ * These are client-side errors that should NOT be retried
+ *
+ * @example
+ * ```typescript
+ * try {
+ *   await evaluator.evaluate('', '5');
+ * } catch (error) {
+ *   if (error instanceof ValidationError) {
+ *     // Show user-friendly error message
+ *     console.error('Invalid input:', error.message);
+ *   }
+ * }
+ * ```
+ */
+declare class ValidationError extends EvaluatorError {
+    constructor(message: string);
+}
+/**
+ * Base API error - thrown when LLM API calls fail
+ * Contains additional context about the API error
+ */
+declare class APIError extends EvaluatorError {
+    readonly statusCode?: number | undefined;
+    readonly retryable: boolean;
+    constructor(message: string, statusCode?: number | undefined, retryable?: boolean, code?: string);
+}
+/**
+ * Authentication error - thrown when API keys are invalid or missing
+ * HTTP 401 or 403 responses
+ * Should NOT be retried
+ *
+ * @example
+ * ```typescript
+ * try {
+ *   await evaluator.evaluate(text, grade);
+ * } catch (error) {
+ *   if (error instanceof AuthenticationError) {
+ *     // Prompt user to check API keys
+ *     console.error('Invalid API keys. Please check your credentials.');
+ *   }
+ * }
+ * ```
+ */
+declare class AuthenticationError extends APIError {
+    constructor(message: string, statusCode?: number);
+}
+/**
+ * Rate limit error - thrown when API rate limits are exceeded
+ * HTTP 429 responses
+ * Should be retried with exponential backoff
+ *
+ * @example
+ * ```typescript
+ * try {
+ *   await evaluator.evaluate(text, grade);
+ * } catch (error) {
+ *   if (error instanceof RateLimitError) {
+ *     // Wait and retry
+ *     await sleep(error.retryAfter || 5000);
+ *     // retry...
+ *   }
+ * }
+ * ```
+ */
+declare class RateLimitError extends APIError {
+    readonly retryAfter?: number | undefined;
+    constructor(message: string, retryAfter?: number | undefined);
+}
+/**
+ * Network error - thrown when network requests fail
+ * Connection timeouts, DNS failures, etc.
+ * May be retryable depending on the scenario
+ *
+ * @example
+ * ```typescript
+ * try {
+ *   await evaluator.evaluate(text, grade);
+ * } catch (error) {
+ *   if (error instanceof NetworkError) {
+ *     // Check network connection and retry
+ *     console.error('Network error:', error.message);
+ *   }
+ * }
+ * ```
+ */
+declare class NetworkError extends APIError {
+    constructor(message: string, retryable?: boolean);
+}
+/**
+ * Timeout error - thrown when requests exceed timeout limits
+ * Should be retried with caution
+ *
+ * @example
+ * ```typescript
+ * try {
+ *   await evaluator.evaluate(text, grade);
+ * } catch (error) {
+ *   if (error instanceof TimeoutError) {
+ *     // Retry with longer timeout or smaller text
+ *     console.error('Request timed out');
+ *   }
+ * }
+ * ```
+ */
+declare class TimeoutError extends APIError {
+    constructor(message?: string);
+}
+/**
+ * Logging interface for the Evaluators SDK
+ *
+ * Provides structured logging with verbosity levels.
+ * Users can inject custom loggers or use the default console logger.
+ */
+/**
+ * Log levels in order of verbosity
+ */
+declare enum LogLevel {
+    /** Debug messages - very verbose, for development */
+    DEBUG = 0,
+    /** Informational messages - normal operations */
+    INFO = 1,
+    /** Warning messages - potentially problematic situations */
+    WARN = 2,
+    /** Error messages - errors that need attention */
+    ERROR = 3,
+    /** Silent - no logging */
+    SILENT = 4
+}
+/**
+ * Context object for structured logging
+ */
+interface LogContext {
+    /** Evaluator type (vocabulary, sentence-structure, etc.) */
+    evaluator?: string;
+    /** Current operation or stage */
+    operation?: string;
+    /** Error object if applicable */
+    error?: Error;
+    /** Additional metadata */
+    [key: string]: unknown;
+}
+/**
+ * Logger interface
+ *
+ * Implement this interface to provide custom logging behavior.
+ *
+ * @example
+ * ```typescript
+ * const customLogger: Logger = {
+ *   debug: (msg, ctx) => myLogger.debug(msg, ctx),
+ *   info: (msg, ctx) => myLogger.info(msg, ctx),
+ *   warn: (msg, ctx) => myLogger.warn(msg, ctx),
+ *   error: (msg, ctx) => myLogger.error(msg, ctx),
+ * };
+ *
+ * const evaluator = new VocabularyEvaluator({
+ *   googleApiKey: '...',
+ *   openaiApiKey: '...',
+ *   logger: customLogger,
+ *   logLevel: LogLevel.INFO,
+ * });
+ * ```
+ */
+interface Logger {
+    /**
+     * Log debug message
+     * Used for detailed debugging information
+     */
+    debug(message: string, context?: LogContext): void;
+    /**
+     * Log informational message
+     * Used for normal operations
+     */
+    info(message: string, context?: LogContext): void;
+    /**
+     * Log warning message
+     * Used for potentially problematic situations
+     */
+    warn(message: string, context?: LogContext): void;
+    /**
+     * Log error message
+     * Used for errors that need attention
+     */
+    error(message: string, context?: LogContext): void;
+}
+/**
+ * Message format for LLM conversations
+ */
+interface Message {
+    role: 'system' | 'user' | 'assistant';
+    content: string;
+}
+/**
+ * Request configuration for structured LLM generation
+ */
+interface LLMRequest<T> {
+    messages: Message[];
+    schema: z.ZodSchema<T>;
+    temperature?: number;
+    maxTokens?: number;
+    model?: string;
+}
+/**
+ * Response from LLM with usage metadata
+ */
+interface LLMResponse<T> {
+    data: T;
+    model: string;
+    usage: {
+        inputTokens: number;
+        outputTokens: number;
+    };
+    latencyMs: number;
+}
+/**
+ * Response from plain text generation
+ */
+interface TextGenerationResponse {
+    text: string;
+    usage: {
+        inputTokens: number;
+        outputTokens: number;
+    };
+    latencyMs: number;
+}
+/**
+ * Base interface for LLM provider implementations
+ */
+interface LLMProvider {
+    /**
+     * Generate structured output from LLM using Zod schema
+     */
+    generateStructured<T>(request: LLMRequest<T>): Promise<LLMResponse<T>>;
+    /**
+     * Generate plain text from LLM
+     */
+    generateText(messages: Message[], temperature?: number): Promise<TextGenerationResponse>;
+}
+/**
+ * Configuration for LLM provider
+ */
+interface ProviderConfig {
+    type: 'openai' | 'anthropic' | 'google' | 'custom';
+    apiKey?: string;
+    model?: string;
+    temperature?: number;
+    baseURL?: string;
+    customProvider?: LLMProvider;
+    maxRetries?: number;
+}
+/**
+ * Stage 1: Detailed sentence analysis output (40+ metrics)
+ * Ported from Python SentenceAnalysesEvaluatorOutput
+ */
+declare const SentenceAnalysisSchema: z.ZodObject<{
+    reasoning: z.ZodString;
+    num_sentences: z.ZodNumber;
+    num_words: z.ZodNumber;
+    flesch_kincaid_grade: z.ZodNumber;
+    num_simple_sentences: z.ZodNumber;
+    num_compound_sentences: z.ZodNumber;
+    num_complex_sentences: z.ZodNumber;
+    num_compound_complex_sentences: z.ZodNumber;
+    num_other_sentences: z.ZodNumber;
+    num_independent_clauses: z.ZodNumber;
+    num_subordinate_clauses: z.ZodNumber;
+    num_total_clauses: z.ZodNumber;
+    num_sentences_with_subordinate: z.ZodNumber;
+    num_sentences_with_multiple_subordinates: z.ZodNumber;
+    num_sentences_with_embedded_clauses: z.ZodNumber;
+    num_prepositional_phrases: z.ZodNumber;
+    num_participle_phrases: z.ZodNumber;
+    num_appositive_phrases: z.ZodNumber;
+    num_simple_transitions: z.ZodNumber;
+    num_sophisticated_transitions: z.ZodNumber;
+    words_in_simple_sentences: z.ZodNumber;
+    words_in_compound_sentences: z.ZodNumber;
+    words_in_complex_sentences: z.ZodNumber;
+    words_in_compound_complex_sentences: z.ZodNumber;
+    words_in_other_sentences: z.ZodNumber;
+    sentence_word_counts: z.ZodArray<z.ZodNumber, "many">;
+    num_one_concept_sentences: z.ZodNumber;
+    num_multi_concept_sentences: z.ZodNumber;
+    num_cleft_sentences: z.ZodNumber;
+    max_clauses_in_any_sentence: z.ZodNumber;
+    num_compound: z.ZodNumber;
+    num_basic_complex: z.ZodNumber;
+    num_advanced_complex: z.ZodNumber;
+    percentage_simple: z.ZodNumber;
+    percentage_compound: z.ZodNumber;
+    percentage_basic_complex: z.ZodNumber;
+    percentage_advanced_complex: z.ZodNumber;
+}, "strip", z.ZodTypeAny, {
+    reasoning: string;
+    num_sentences: number;
+    num_words: number;
+    flesch_kincaid_grade: number;
+    num_simple_sentences: number;
+    num_compound_sentences: number;
+    num_complex_sentences: number;
+    num_compound_complex_sentences: number;
+    num_other_sentences: number;
+    num_independent_clauses: number;
+    num_subordinate_clauses: number;
+    num_total_clauses: number;
+    num_sentences_with_subordinate: number;
+    num_sentences_with_multiple_subordinates: number;
+    num_sentences_with_embedded_clauses: number;
+    num_prepositional_phrases: number;
+    num_participle_phrases: number;
+    num_appositive_phrases: number;
+    num_simple_transitions: number;
+    num_sophisticated_transitions: number;
+    words_in_simple_sentences: number;
+    words_in_compound_sentences: number;
+    words_in_complex_sentences: number;
+    words_in_compound_complex_sentences: number;
+    words_in_other_sentences: number;
+    sentence_word_counts: number[];
+    num_one_concept_sentences: number;
+    num_multi_concept_sentences: number;
+    num_cleft_sentences: number;
+    max_clauses_in_any_sentence: number;
+    num_compound: number;
+    num_basic_complex: number;
+    num_advanced_complex: number;
+    percentage_simple: number;
+    percentage_compound: number;
+    percentage_basic_complex: number;
+    percentage_advanced_complex: number;
+}, {
+    reasoning: string;
+    num_sentences: number;
+    num_words: number;
+    flesch_kincaid_grade: number;
+    num_simple_sentences: number;
+    num_compound_sentences: number;
+    num_complex_sentences: number;
+    num_compound_complex_sentences: number;
+    num_other_sentences: number;
+    num_independent_clauses: number;
+    num_subordinate_clauses: number;
+    num_total_clauses: number;
+    num_sentences_with_subordinate: number;
+    num_sentences_with_multiple_subordinates: number;
+    num_sentences_with_embedded_clauses: number;
+    num_prepositional_phrases: number;
+    num_participle_phrases: number;
+    num_appositive_phrases: number;
+    num_simple_transitions: number;
+    num_sophisticated_transitions: number;
+    words_in_simple_sentences: number;
+    words_in_compound_sentences: number;
+    words_in_complex_sentences: number;
+    words_in_compound_complex_sentences: number;
+    words_in_other_sentences: number;
+    sentence_word_counts: number[];
+    num_one_concept_sentences: number;
+    num_multi_concept_sentences: number;
+    num_cleft_sentences: number;
+    max_clauses_in_any_sentence: number;
+    num_compound: number;
+    num_basic_complex: number;
+    num_advanced_complex: number;
+    percentage_simple: number;
+    percentage_compound: number;
+    percentage_basic_complex: number;
+    percentage_advanced_complex: number;
+}>;
+type SentenceAnalysis = z.infer<typeof SentenceAnalysisSchema>;
+/**
+ * Stage 2: Final complexity classification
+ * Ported from Python ComplexityClassificationOutput
+ */
+declare const ComplexityClassificationSchema: z.ZodObject<{
+    reasoning: z.ZodString;
+    answer: z.ZodEnum<["Slightly complex", "Moderately complex", "Very complex", "Exceedingly complex"]>;
+}, "strip", z.ZodTypeAny, {
+    reasoning: string;
+    answer: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
+}, {
+    reasoning: string;
+    answer: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
+}>;
+type ComplexityClassification = z.infer<typeof ComplexityClassificationSchema>;
+/**
+ * Internal data structure for sentence structure evaluation
+ */
+interface SentenceStructureInternal {
+    sentenceAnalysis: SentenceAnalysis;
+    features: SentenceFeatures;
+    complexity: ComplexityClassification;
+}
+/**
+ * Engineered features computed from sentence analysis
+ * These are calculated in TypeScript, not requested from LLM
+ */
+interface SentenceFeatures extends SentenceAnalysis {
+    avg_words_per_sentence: number;
+    sentence_length_variation: number;
+    percent_short_sentences: number;
+    percent_medium_sentences: number;
+    percent_long_sentences: number;
+    percent_very_long_sentences: number;
+    percent_simple_sentences: number;
+    percent_compound_sentences: number;
+    percent_complex_sentences: number;
+    percent_compound_complex_sentences: number;
+    percent_other_sentences: number;
+    percent_words_in_simple_sentences: number;
+    percent_words_in_complex_sentences: number;
+    percent_words_in_compound_sentences: number;
+    percent_words_in_compound_complex_sentences: number;
+    percent_words_in_other_sentences: number;
+    avg_subordinates_per_sentence: number;
+    avg_clauses_per_sentence: number;
+    percent_sentences_with_subordinate: number;
+    percent_sentences_with_multiple_subordinates: number;
+    percent_sentences_with_embedded_clauses: number;
+    prep_phrase_density: number;
+    participle_phrase_density: number;
+    appositive_phrase_density: number;
+    avg_transitions_per_sentence: number;
+    percent_sophisticated_transitions: number;
+    percent_sentences_w_one_concept: number;
+    percent_sentences_w_multi_concept: number;
+    percent_cleft_sentences: number;
+}
+/**
+ * Vocabulary evaluation output schema
+ */
+declare const VocabularyComplexitySchema: z.ZodObject<{
+    tier_2_words: z.ZodString;
+    tier_3_words: z.ZodString;
+    archaic_words: z.ZodString;
+    other_complex_words: z.ZodString;
+    complexity_score: z.ZodEnum<["Slightly complex", "Moderately complex", "Very complex", "Exceedingly complex"]>;
+    reasoning: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+    reasoning: string;
+    tier_2_words: string;
+    tier_3_words: string;
+    archaic_words: string;
+    other_complex_words: string;
+    complexity_score: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
+}, {
+    reasoning: string;
+    tier_2_words: string;
+    tier_3_words: string;
+    archaic_words: string;
+    other_complex_words: string;
+    complexity_score: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
+}>;
+type VocabularyInternal = z.infer<typeof VocabularyComplexitySchema>;
+/**
+ * Evaluation status
+ */
+type EvaluationStatus = 'success' | 'error';
+/**
+ * Token usage metrics from LLM providers
+ */
+interface TokenUsage {
+    input_tokens: number;
+    output_tokens: number;
+}
+/**
+ * Per-stage details for multi-stage evaluations
+ */
+interface StageDetail {
+    /** Stage name (e.g., "background_knowledge", "complexity_evaluation") */
+    stage: string;
+    /** Provider used for this stage (e.g., "openai:gpt-4o") */
+    provider: string;
+    /** Total latency including all retries (ms) */
+    latency_ms: number;
+    /** Token usage aggregated across all attempts */
+    token_usage?: TokenUsage;
+    /**
+     * Whether schema validation failed (indicates prompt needs clearer instructions)
+     *
+     * TODO: Not currently tracked. Vercel AI SDK abstracts validation away.
+     * To implement: Add custom retry wrapper that catches validation errors.
+     */
+    schema_validation_failed?: boolean;
+}
+/**
+ * Extensible metadata for telemetry events
+ */
+interface TelemetryMetadata {
+    /** Detailed breakdown by stage (for multi-stage evaluations) */
+    stage_details?: StageDetail[];
+}
+/**
+ * Telemetry event payload
+ */
+interface TelemetryEvent {
+    timestamp: string;
+    sdk_version: string;
+    evaluator_type: string;
+    grade?: string;
+    status: EvaluationStatus;
+    error_code?: string;
+    latency_ms: number;
+    text_length_chars: number;
+    provider: string;
+    token_usage?: TokenUsage;
+    metadata?: TelemetryMetadata;
+    input_text?: string;
+}
+/**
+ * Configuration for telemetry client
+ */
+interface TelemetryConfig {
+    /** Analytics service endpoint URL */
+    endpoint: string;
+    /** Learning Commons partner key (optional, sent as X-API-Key header) */
+    partnerKey?: string;
+    /** Client ID for anonymous tracking (persistent UUID from ~/.config/learning-commons/config.json) */
+    clientId: string;
+    /** Enable telemetry (default: true) */
+    enabled: boolean;
+    /** Logger instance (respects the SDK's configured log level and custom logger) */
+    logger: Logger;
+}
+/**
+ * Telemetry client for sending analytics events
+ *
+ * Fire-and-forget implementation that never blocks SDK operations.
+ * Errors are logged but don't fail evaluations.
+ */
+declare class TelemetryClient {
+    private config;
+    private logger;
+    constructor(config: TelemetryConfig);
+    /**
+     * Send telemetry event to analytics service
+     *
+     * Fire-and-forget: Errors are logged but don't throw.
+     */
+    send(event: TelemetryEvent): Promise<void>;
+}
+/**
+ * Granular telemetry configuration options
+ */
+interface TelemetryOptions {
+    /** Enable telemetry (default: true) */
+    enabled?: boolean;
+    /** Record input text in telemetry (default: false) */
+    recordInputs?: boolean;
+}
+/**
+ * Base configuration for all evaluators
+ */
+interface BaseEvaluatorConfig {
+    /** Google API key (for evaluators using Gemini) */
+    googleApiKey?: string;
+    /** OpenAI API key (for evaluators using GPT) */
+    openaiApiKey?: string;
+    /** Learning Commons partner key for authenticated telemetry (optional) */
+    partnerKey?: string;
+    /**
+     * Maximum number of retries for failed API calls (default: 2)
+     * Set to 0 to disable retries.
+     *
+     * Note: With maxRetries=2, a failed call will be attempted up to 3 times total
+     * (1 initial attempt + 2 retries)
+     */
+    maxRetries?: number;
+    /**
+     * Telemetry configuration (default: all enabled)
+     *
+     * Can be:
+     * - `true`: Enable with defaults (recordInputs: false)
+     * - `false`: Disable completely
+     * - `TelemetryOptions`: Granular control
+     */
+    telemetry?: boolean | TelemetryOptions;
+    /**
+     * Custom logger implementation (optional)
+     * If not provided, uses console logger with specified logLevel
+     */
+    logger?: Logger;
+    /**
+     * Log level for default console logger (default: WARN)
+     * Only used if custom logger is not provided
+     *
+     * - DEBUG: Very verbose, shows all operations
+     * - INFO: Normal operations
+     * - WARN: Warnings only (default)
+     * - ERROR: Errors only
+     * - SILENT: No logging
+     */
+    logLevel?: LogLevel;
+}
+/**
+ * Evaluator metadata interface
+ * Each evaluator must provide this metadata as static properties
+ */
+interface EvaluatorMetadata {
+    /** Unique identifier for the evaluator (e.g., 'vocabulary', 'sentence-structure') */
+    readonly id: string;
+    /** Human-readable name (e.g., 'Vocabulary', 'Sentence Structure') */
+    readonly name: string;
+    /** Brief description of what the evaluator does */
+    readonly description: string;
+    /** Supported grade levels (e.g., ['3', '4', '5', ...]) */
+    readonly supportedGrades: readonly string[];
+    /** Whether this evaluator requires a Google API key */
+    readonly requiresGoogleKey: boolean;
+    /** Whether this evaluator requires an OpenAI API key */
+    readonly requiresOpenAIKey: boolean;
+}
+/**
+ * Abstract base class for all evaluators
+ *
+ * Provides common functionality:
+ * - Telemetry setup and event sending
+ * - Text validation
+ * - Grade validation (with overridable default)
+ * - Metadata creation
+ *
+ * Concrete evaluators must implement:
+ * - static metadata: Provide evaluator metadata (see EvaluatorMetadata interface)
+ */
+declare abstract class BaseEvaluator {
+    protected telemetryClient?: TelemetryClient;
+    protected logger: Logger;
+    protected config: Required<Pick<BaseEvaluatorConfig, 'maxRetries'>> & {
+        telemetry: Required<TelemetryOptions>;
+    };
+    /**
+     * Static metadata for the evaluator
+     *
+     * Concrete evaluators MUST define this property.
+     *
+     * @example
+     * ```typescript
+     * class MyEvaluator extends BaseEvaluator {
+     *   static readonly metadata = {
+     *     id: 'my-evaluator',
+     *     name: 'My Evaluator',
+     *     description: 'Does something useful',
+     *     supportedGrades: ['3', '4', '5'],
+     *     requiresGoogleKey: true,
+     *     requiresOpenAIKey: false,
+     *   };
+     * }
+     * ```
+     */
+    static readonly metadata: EvaluatorMetadata;
+    constructor(config: BaseEvaluatorConfig);
+    /**
+     * Get metadata for this evaluator instance
+     * @throws {ConfigurationError} If the subclass has not defined static metadata
+     */
+    protected get metadata(): EvaluatorMetadata;
+    /**
+     * Validate that required API keys are provided based on metadata
+     * @throws {ConfigurationError} If required API keys are missing
+     */
+    private validateApiKeys;
+    /**
+     * Normalize telemetry config to standard format
+     */
+    private normalizeTelemetryConfig;
+    /**
+     * Get the evaluator type identifier from metadata
+     * @returns The evaluator type ID (e.g., "vocabulary", "sentence-structure")
+     */
+    protected getEvaluatorType(): string;
+    /**
+     * Validate text meets requirements
+     * Default implementation - can be overridden by concrete evaluators
+     *
+     * @throws {ValidationError} If text is invalid
+     */
+    protected validateText(text: string): void;
+    /**
+     * Validate grade is in supported range
+     * Default implementation - can be overridden by concrete evaluators
+     *
+     * @param grade - Grade level to validate
+     * @param validGrades - Set of valid grades for this evaluator
+     * @throws {ValidationError} If grade is invalid
+     */
+    protected validateGrade(grade: string, validGrades: Set<string>): void;
+    /**
+     * Send telemetry event to analytics service
+     * Common helper for all evaluators
+     */
+    protected sendTelemetry(params: {
+        status: 'success' | 'error';
+        latencyMs: number;
+        textLength: number;
+        grade?: string;
+        provider: string;
+        errorCode?: string;
+        tokenUsage?: TokenUsage;
+        metadata?: TelemetryMetadata;
+        inputText?: string;
+    }): Promise<void>;
+}
+/**
+ * Vocabulary Evaluator
+ *
+ * Evaluates vocabulary complexity of educational texts relative to grade level.
+ * Uses a 2-stage process:
+ * 1. Generate background knowledge assumption for the student's grade level
+ * 2. Evaluate vocabulary complexity using that background knowledge
+ *
+ * Based on Qual Text Complexity rubric (SAP) with 4 levels:
+ * - Slightly complex
+ * - Moderately complex
+ * - Very complex
+ * - Exceedingly complex
+ *
+ * @example
+ * ```typescript
+ * const evaluator = new VocabularyEvaluator({
+ *   googleApiKey: process.env.GOOGLE_API_KEY,
+ *   openaiApiKey: process.env.OPENAI_API_KEY
+ * });
+ *
+ * const result = await evaluator.evaluate(text, "3");
+ * console.log(result.score); // "Moderately complex"
+ * console.log(result.reasoning);
+ * ```
+ */
+declare class VocabularyEvaluator extends BaseEvaluator {
+    static readonly metadata: {
+        id: string;
+        name: string;
+        description: string;
+        supportedGrades: readonly ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"];
+        requiresGoogleKey: boolean;
+        requiresOpenAIKey: boolean;
+    };
+    private grades34ComplexityProvider;
+    private otherGradesComplexityProvider;
+    private backgroundKnowledgeProvider;
+    constructor(config: BaseEvaluatorConfig);
+    /**
+     * Evaluate vocabulary complexity for a given text and grade level
+     *
+     * @param text - The text to evaluate
+     * @param grade - The target grade level (3-12)
+     * @returns Evaluation result with complexity score and detailed analysis
+     * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
+     * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
+     */
+    evaluate(text: string, grade: string): Promise<EvaluationResult<TextComplexityLevel, VocabularyInternal>>;
+    /**
+     * Stage 1: Generate background knowledge assumption
+     *
+     * Estimates what topics the student at the given grade level would be familiar with
+     * based on Common Core curriculum progression.
+     */
+    private getBackgroundKnowledgeAssumption;
+    /**
+     * Stage 2: Evaluate vocabulary complexity
+     *
+     * Uses the Qual Text Complexity rubric (SAP) and background knowledge to evaluate vocabulary complexity.
+     * Grades 3-4 use Gemini 2.5 Pro; grades 5-12 use GPT-4.1.
+     */
+    private evaluateComplexity;
+}
+/**
+ * Functional API for vocabulary evaluation
+ *
+ * @example
+ * ```typescript
+ * const result = await evaluateVocabulary(
+ *   "The mitochondria is the powerhouse of the cell.",
+ *   "3",
+ *   {
+ *     googleApiKey: process.env.GOOGLE_API_KEY,
+ *     openaiApiKey: process.env.OPENAI_API_KEY
+ *   }
+ * );
+ * ```
+ */
+declare function evaluateVocabulary(text: string, grade: string, config: BaseEvaluatorConfig): Promise<EvaluationResult<TextComplexityLevel, VocabularyInternal>>;
+/**
+ * Sentence Structure Evaluator
+ *
+ * Evaluates sentence structure complexity of educational texts relative to grade level.
+ * Uses a 2-stage process:
+ * 1. Analyze grammatical structure (sentence types, clauses, phrases, etc.)
+ * 2. Classify complexity using features and grade-specific rubric
+ *
+ * Based on Qualitative Text Complexity rubric with 4 levels:
+ * - Slightly complex
+ * - Moderately complex
+ * - Very complex
+ * - Exceedingly complex
+ *
+ * @example
+ * ```typescript
+ * const evaluator = new SentenceStructureEvaluator({
+ *   openaiApiKey: process.env.OPENAI_API_KEY
+ * });
+ *
+ * const result = await evaluator.evaluate(text, "3");
+ * console.log(result.score); // "Moderately complex"
+ * console.log(result.reasoning);
+ * ```
+ */
+declare class SentenceStructureEvaluator extends BaseEvaluator {
+    static readonly metadata: {
+        id: string;
+        name: string;
+        description: string;
+        supportedGrades: readonly ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"];
+        requiresGoogleKey: boolean;
+        requiresOpenAIKey: boolean;
+    };
+    private analysisProvider;
+    private complexityProvider;
+    constructor(config: BaseEvaluatorConfig);
+    /**
+     * Evaluate sentence structure complexity for a given text and grade level
+     *
+     * @param text - The text to evaluate
+     * @param grade - The target grade level (3-12)
+     * @returns Evaluation result with complexity score and detailed analysis
+     * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
+     * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
+     */
+    evaluate(text: string, grade: string): Promise<EvaluationResult<TextComplexityLevel, SentenceStructureInternal>>;
+    /**
+     * Stage 1: Analyze sentence grammatical structure
+     *
+     * Analyzes sentence types, clauses, phrases, transitions, and other grammatical features
+     */
+    private analyzeSentenceStructure;
+    /**
+     * Stage 2: Classify sentence structure complexity
+     *
+     * Uses engineered features and grade-specific rubric to classify complexity level
+     */
+    private classifyComplexity;
+}
+/**
+ * Functional API for sentence structure evaluation
+ *
+ * @example
+ * ```typescript
+ * const result = await evaluateSentenceStructure(
+ *   "The cat sat on the mat. It was sleeping peacefully.",
+ *   "3",
+ *   {
+ *     openaiApiKey: process.env.OPENAI_API_KEY
+ *   }
+ * );
+ * ```
+ */
+declare function evaluateSentenceStructure(text: string, grade: string, config: BaseEvaluatorConfig): Promise<EvaluationResult<TextComplexityLevel, SentenceStructureInternal>>;
+/**
+ * Grade Level Appropriateness Evaluator
+ *
+ * Evaluates whether AI-generated text is suitable for a given grade band.
+ * Uses a structured 4-step analysis process:
+ * 1. Quantitative analysis (word count, Flesch-Kincaid)
+ * 2. Qualitative complexity (text structure, language, purpose, knowledge demands)
+ * 3. Background knowledge assessment
+ * 4. Synthesis and final recommendation
+ *
+ * Returns:
+ * - Target grade band (K-1, 2-3, 4-5, 6-8, 9-10, 11-CCR)
+ * - Alternative grade band (with scaffolding)
+ * - Specific scaffolding recommendations
+ *
+ * @example
+ * ```typescript
+ * const evaluator = new GradeLevelAppropriatenessEvaluator({
+ *   googleApiKey: process.env.GOOGLE_API_KEY
+ * });
+ *
+ * const result = await evaluator.evaluate(text);
+ * console.log(result.score); // "9-10"
+ * console.log(result._internal.alternative_grade); // "6-8"
+ * console.log(result._internal.scaffolding_needed);
+ * ```
+ */
+declare class GradeLevelAppropriatenessEvaluator extends BaseEvaluator {
+    static readonly metadata: {
+        id: string;
+        name: string;
+        description: string;
+        supportedGrades: readonly [];
+        requiresGoogleKey: boolean;
+        requiresOpenAIKey: boolean;
+    };
+    private provider;
+    constructor(config: BaseEvaluatorConfig);
+    /**
+     * Evaluate grade level appropriateness for a given text
+     *
+     * @param text - The text to evaluate
+     * @returns Evaluation result with grade recommendations and scaffolding suggestions
+     * @throws {ValidationError} If text is empty or too short/long
+     * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
+     */
+    evaluate(text: string): Promise<EvaluationResult<GradeBand, GradeLevelAppropriatenessInternal>>;
+}
+/**
+ * Functional API for grade level appropriateness evaluation
+ *
+ * @example
+ * ```typescript
+ * const result = await evaluateGradeLevelAppropriateness(
+ *   "Tides are the rise and fall of sea levels...",
+ *   {
+ *     googleApiKey: process.env.GOOGLE_API_KEY
+ *   }
+ * );
+ * ```
+ */
+declare function evaluateGradeLevelAppropriateness(text: string, config: BaseEvaluatorConfig): Promise<EvaluationResult<GradeBand, GradeLevelAppropriatenessInternal>>;
+/**
+ * Result map returned by TextComplexityEvaluator.
+ * Each key holds the full evaluation result from its sub-evaluator, or an error if it failed.
+ */
+interface TextComplexityResult {
+    vocabulary: EvaluationResult<TextComplexityLevel, VocabularyInternal> | {
+        error: Error;
+    };
+    sentenceStructure: EvaluationResult<TextComplexityLevel, SentenceStructureInternal> | {
+        error: Error;
+    };
+}
+/**
+ * Text Complexity Evaluator
+ *
+ * Composite evaluator that analyzes both vocabulary and sentence structure complexity.
+ * Runs both evaluations in parallel with concurrency control to avoid rate limiting.
+ *
+ * Uses:
+ * - VocabularyEvaluator (Google Gemini 2.5 Pro + OpenAI GPT-4o)
+ * - SentenceStructureEvaluator (OpenAI GPT-4o)
+ *
+ * @example
+ * ```typescript
+ * const evaluator = new TextComplexityEvaluator({
+ *   googleApiKey: process.env.GOOGLE_API_KEY,
+ *   openaiApiKey: process.env.OPENAI_API_KEY
+ * });
+ *
+ * const result = await evaluator.evaluate(text, "5");
+ * if (!('error' in result.vocabulary)) {
+ *   console.log(result.vocabulary.score); // "Moderately complex"
+ * }
+ * ```
+ */
+declare class TextComplexityEvaluator extends BaseEvaluator {
+    static readonly metadata: {
+        id: string;
+        name: string;
+        description: string;
+        supportedGrades: readonly ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"];
+        requiresGoogleKey: boolean;
+        requiresOpenAIKey: boolean;
+    };
+    private vocabularyEvaluator;
+    private sentenceStructureEvaluator;
+    private limit;
+    constructor(config: BaseEvaluatorConfig);
+    /**
+     * Evaluate text complexity for a given text and grade level
+     *
+     * Runs vocabulary and sentence structure evaluations in parallel with concurrency control.
+     * If both sub-evaluators fail, throws an error. Otherwise returns a result map where
+     * failed sub-evaluators are represented as `{ error: Error }`.
+     *
+     * @param text - The text to evaluate
+     * @param grade - The target grade level (3-12)
+     * @returns Map of sub-evaluator results
+     * @throws {ValidationError} If text is empty or grade is invalid
+     * @throws {Error} If all sub-evaluators fail
+     */
+    evaluate(text: string, grade: string): Promise<TextComplexityResult>;
+    /**
+     * Run a sub-evaluator with error handling.
+     * Returns the evaluation result or `{ error: Error }` if the evaluator throws.
+     */
+    private runSubEvaluator;
+}
+/**
+ * Functional API for text complexity evaluation
+ *
+ * @example
+ * ```typescript
+ * const result = await evaluateTextComplexity(
+ *   "The cat sat on the mat.",
+ *   "5",
+ *   {
+ *     googleApiKey: process.env.GOOGLE_API_KEY,
+ *     openaiApiKey: process.env.OPENAI_API_KEY
+ *   }
+ * );
+ * ```
+ */
+declare function evaluateTextComplexity(text: string, grade: string, config: BaseEvaluatorConfig): Promise<TextComplexityResult>;
+/**
+ * Calculate Flesch-Kincaid Grade Level
+ * Equivalent to Python's textstat.flesch_kincaid_grade()
+ */
+declare function calculateFleschKincaidGrade(text: string): number;
+/**
+ * Additional readability metrics
+ */
+interface ReadabilityMetrics {
+    sentenceCount: number;
+    wordCount: number;
+    characterCount: number;
+    syllableCount: number;
+    avgWordsPerSentence: number;
+    avgSyllablesPerWord: number;
+    fleschKincaidGrade: number;
+}
+declare function calculateReadabilityMetrics(text: string): ReadabilityMetrics;
+/**
+ * Add engineered features to sentence analysis output
+ * Ported from Python add_engineered_features function
+ */
+declare function addEngineeredFeatures(analysis: SentenceAnalysis): SentenceFeatures;
+/**
+ * Convert sentence features to JSON string for LLM prompt
+ * Ported from Python row_to_features_json
+ */
+declare function featuresToJSON(features: SentenceFeatures, decimals?: number, castToInt?: boolean): string;
+export { APIError, AuthenticationError, type BaseEvaluatorConfig, type ComplexityClassification, ComplexityClassificationSchema, ConfigurationError, type EvaluationError, type EvaluationMetadata, type EvaluationResult, EvaluatorError, type EvaluatorMetadata, GradeBand, GradeLevelAppropriatenessEvaluator, type GradeLevelAppropriatenessInternal, GradeLevelAppropriatenessSchema, type LLMProvider, type LLMRequest, type LLMResponse, type LogContext, LogLevel, type Logger, type Message, NetworkError, type ProviderConfig, RateLimitError, type ReadabilityMetrics, type SentenceAnalysis, SentenceAnalysisSchema, type SentenceFeatures, SentenceStructureEvaluator, type SentenceStructureInternal, type TelemetryOptions, TextComplexityEvaluator, TextComplexityLevel, type TextComplexityResult, type TextGenerationResponse, TimeoutError, ValidationError, VocabularyEvaluator, type VocabularyInternal, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateGradeLevelAppropriateness, evaluateSentenceStructure, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };