@learning-commons/evaluators 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1142 @@
1
+ import { z } from 'zod';
2
+
3
+ /**
4
+ * Shared complexity levels used across all text complexity evaluators
5
+ * (Vocabulary, Sentence Structure, and any future sub-evaluators)
6
+ */
7
+ declare const TextComplexityLevel: z.ZodEnum<["Slightly complex", "Moderately complex", "Very complex", "Exceedingly complex"]>;
8
+ type TextComplexityLevel = z.infer<typeof TextComplexityLevel>;
9
+ /**
10
+ * Metadata attached to all evaluation results
11
+ */
12
+ interface EvaluationMetadata {
13
+ model: string;
14
+ processingTimeMs: number;
15
+ }
16
+ /**
17
+ * Base evaluation result structure
18
+ */
19
+ interface EvaluationResult<TScore = string, TInternal = unknown> {
20
+ score: TScore;
21
+ reasoning: string;
22
+ metadata: EvaluationMetadata;
23
+ _internal?: TInternal;
24
+ }
25
+ /**
26
+ * Error type for failed evaluations
27
+ */
28
+ interface EvaluationError {
29
+ error: string;
30
+ input: {
31
+ text: string;
32
+ grade?: string;
33
+ };
34
+ }
35
+
36
+ /**
37
+ * Valid grade bands for grade level appropriateness evaluation
38
+ */
39
+ declare const GradeBand: z.ZodEnum<["K-1", "2-3", "4-5", "6-8", "9-10", "11-CCR"]>;
40
+ type GradeBand = z.infer<typeof GradeBand>;
41
+ /**
42
+ * Output schema for Grade Level Appropriateness evaluation
43
+ * Matches Python OutputRanges model
44
+ */
45
+ declare const GradeLevelAppropriatenessSchema: z.ZodObject<{
46
+ reasoning: z.ZodString;
47
+ grade: z.ZodEnum<["K-1", "2-3", "4-5", "6-8", "9-10", "11-CCR"]>;
48
+ alternative_grade: z.ZodEnum<["K-1", "2-3", "4-5", "6-8", "9-10", "11-CCR"]>;
49
+ scaffolding_needed: z.ZodString;
50
+ }, "strip", z.ZodTypeAny, {
51
+ reasoning: string;
52
+ grade: "K-1" | "2-3" | "4-5" | "6-8" | "9-10" | "11-CCR";
53
+ alternative_grade: "K-1" | "2-3" | "4-5" | "6-8" | "9-10" | "11-CCR";
54
+ scaffolding_needed: string;
55
+ }, {
56
+ reasoning: string;
57
+ grade: "K-1" | "2-3" | "4-5" | "6-8" | "9-10" | "11-CCR";
58
+ alternative_grade: "K-1" | "2-3" | "4-5" | "6-8" | "9-10" | "11-CCR";
59
+ scaffolding_needed: string;
60
+ }>;
61
+ type GradeLevelAppropriatenessInternal = z.infer<typeof GradeLevelAppropriatenessSchema>;
62
+
63
+ /**
64
+ * Custom error types for the Evaluators SDK
65
+ *
66
+ * This module provides a hierarchy of error types to help users
67
+ * distinguish between different error scenarios and implement
68
+ * appropriate error handling strategies.
69
+ */
70
+ /**
71
+ * Base error class for all evaluator errors
72
+ */
73
+ declare class EvaluatorError extends Error {
74
+ readonly code?: string | undefined;
75
+ constructor(message: string, code?: string | undefined);
76
+ }
77
+ /**
78
+ * Configuration error - thrown when the evaluator is misconfigured
79
+ * These are developer errors (e.g. missing API keys) that should NOT be retried
80
+ *
81
+ * @example
82
+ * ```typescript
83
+ * try {
84
+ * const evaluator = new VocabularyEvaluator({ googleApiKey: '' });
85
+ * } catch (error) {
86
+ * if (error instanceof ConfigurationError) {
87
+ * console.error('Check your evaluator config:', error.message);
88
+ * }
89
+ * }
90
+ * ```
91
+ */
92
+ declare class ConfigurationError extends EvaluatorError {
93
+ constructor(message: string);
94
+ }
95
+ /**
96
+ * Validation error - thrown when input validation fails
97
+ * These are client-side errors that should NOT be retried
98
+ *
99
+ * @example
100
+ * ```typescript
101
+ * try {
102
+ * await evaluator.evaluate('', '5');
103
+ * } catch (error) {
104
+ * if (error instanceof ValidationError) {
105
+ * // Show user-friendly error message
106
+ * console.error('Invalid input:', error.message);
107
+ * }
108
+ * }
109
+ * ```
110
+ */
111
+ declare class ValidationError extends EvaluatorError {
112
+ constructor(message: string);
113
+ }
114
+ /**
115
+ * Base API error - thrown when LLM API calls fail
116
+ * Contains additional context about the API error
117
+ */
118
+ declare class APIError extends EvaluatorError {
119
+ readonly statusCode?: number | undefined;
120
+ readonly retryable: boolean;
121
+ constructor(message: string, statusCode?: number | undefined, retryable?: boolean, code?: string);
122
+ }
123
+ /**
124
+ * Authentication error - thrown when API keys are invalid or missing
125
+ * HTTP 401 or 403 responses
126
+ * Should NOT be retried
127
+ *
128
+ * @example
129
+ * ```typescript
130
+ * try {
131
+ * await evaluator.evaluate(text, grade);
132
+ * } catch (error) {
133
+ * if (error instanceof AuthenticationError) {
134
+ * // Prompt user to check API keys
135
+ * console.error('Invalid API keys. Please check your credentials.');
136
+ * }
137
+ * }
138
+ * ```
139
+ */
140
+ declare class AuthenticationError extends APIError {
141
+ constructor(message: string, statusCode?: number);
142
+ }
143
+ /**
144
+ * Rate limit error - thrown when API rate limits are exceeded
145
+ * HTTP 429 responses
146
+ * Should be retried with exponential backoff
147
+ *
148
+ * @example
149
+ * ```typescript
150
+ * try {
151
+ * await evaluator.evaluate(text, grade);
152
+ * } catch (error) {
153
+ * if (error instanceof RateLimitError) {
154
+ * // Wait and retry
155
+ * await sleep(error.retryAfter || 5000);
156
+ * // retry...
157
+ * }
158
+ * }
159
+ * ```
160
+ */
161
+ declare class RateLimitError extends APIError {
162
+ readonly retryAfter?: number | undefined;
163
+ constructor(message: string, retryAfter?: number | undefined);
164
+ }
165
+ /**
166
+ * Network error - thrown when network requests fail
167
+ * Connection timeouts, DNS failures, etc.
168
+ * May be retryable depending on the scenario
169
+ *
170
+ * @example
171
+ * ```typescript
172
+ * try {
173
+ * await evaluator.evaluate(text, grade);
174
+ * } catch (error) {
175
+ * if (error instanceof NetworkError) {
176
+ * // Check network connection and retry
177
+ * console.error('Network error:', error.message);
178
+ * }
179
+ * }
180
+ * ```
181
+ */
182
+ declare class NetworkError extends APIError {
183
+ constructor(message: string, retryable?: boolean);
184
+ }
185
+ /**
186
+ * Timeout error - thrown when requests exceed timeout limits
187
+ * Should be retried with caution
188
+ *
189
+ * @example
190
+ * ```typescript
191
+ * try {
192
+ * await evaluator.evaluate(text, grade);
193
+ * } catch (error) {
194
+ * if (error instanceof TimeoutError) {
195
+ * // Retry with longer timeout or smaller text
196
+ * console.error('Request timed out');
197
+ * }
198
+ * }
199
+ * ```
200
+ */
201
+ declare class TimeoutError extends APIError {
202
+ constructor(message?: string);
203
+ }
204
+
205
+ /**
206
+ * Logging interface for the Evaluators SDK
207
+ *
208
+ * Provides structured logging with verbosity levels.
209
+ * Users can inject custom loggers or use the default console logger.
210
+ */
211
+ /**
212
+ * Log levels in order of verbosity
213
+ */
214
+ declare enum LogLevel {
215
+ /** Debug messages - very verbose, for development */
216
+ DEBUG = 0,
217
+ /** Informational messages - normal operations */
218
+ INFO = 1,
219
+ /** Warning messages - potentially problematic situations */
220
+ WARN = 2,
221
+ /** Error messages - errors that need attention */
222
+ ERROR = 3,
223
+ /** Silent - no logging */
224
+ SILENT = 4
225
+ }
226
+ /**
227
+ * Context object for structured logging
228
+ */
229
+ interface LogContext {
230
+ /** Evaluator type (vocabulary, sentence-structure, etc.) */
231
+ evaluator?: string;
232
+ /** Current operation or stage */
233
+ operation?: string;
234
+ /** Error object if applicable */
235
+ error?: Error;
236
+ /** Additional metadata */
237
+ [key: string]: unknown;
238
+ }
239
+ /**
240
+ * Logger interface
241
+ *
242
+ * Implement this interface to provide custom logging behavior.
243
+ *
244
+ * @example
245
+ * ```typescript
246
+ * const customLogger: Logger = {
247
+ * debug: (msg, ctx) => myLogger.debug(msg, ctx),
248
+ * info: (msg, ctx) => myLogger.info(msg, ctx),
249
+ * warn: (msg, ctx) => myLogger.warn(msg, ctx),
250
+ * error: (msg, ctx) => myLogger.error(msg, ctx),
251
+ * };
252
+ *
253
+ * const evaluator = new VocabularyEvaluator({
254
+ * googleApiKey: '...',
255
+ * openaiApiKey: '...',
256
+ * logger: customLogger,
257
+ * logLevel: LogLevel.INFO,
258
+ * });
259
+ * ```
260
+ */
261
+ interface Logger {
262
+ /**
263
+ * Log debug message
264
+ * Used for detailed debugging information
265
+ */
266
+ debug(message: string, context?: LogContext): void;
267
+ /**
268
+ * Log informational message
269
+ * Used for normal operations
270
+ */
271
+ info(message: string, context?: LogContext): void;
272
+ /**
273
+ * Log warning message
274
+ * Used for potentially problematic situations
275
+ */
276
+ warn(message: string, context?: LogContext): void;
277
+ /**
278
+ * Log error message
279
+ * Used for errors that need attention
280
+ */
281
+ error(message: string, context?: LogContext): void;
282
+ }
283
+
284
+ /**
285
+ * Message format for LLM conversations
286
+ */
287
+ interface Message {
288
+ role: 'system' | 'user' | 'assistant';
289
+ content: string;
290
+ }
291
+ /**
292
+ * Request configuration for structured LLM generation
293
+ */
294
+ interface LLMRequest<T> {
295
+ messages: Message[];
296
+ schema: z.ZodSchema<T>;
297
+ temperature?: number;
298
+ maxTokens?: number;
299
+ model?: string;
300
+ }
301
+ /**
302
+ * Response from LLM with usage metadata
303
+ */
304
+ interface LLMResponse<T> {
305
+ data: T;
306
+ model: string;
307
+ usage: {
308
+ inputTokens: number;
309
+ outputTokens: number;
310
+ };
311
+ latencyMs: number;
312
+ }
313
+ /**
314
+ * Response from plain text generation
315
+ */
316
+ interface TextGenerationResponse {
317
+ text: string;
318
+ usage: {
319
+ inputTokens: number;
320
+ outputTokens: number;
321
+ };
322
+ latencyMs: number;
323
+ }
324
+ /**
325
+ * Base interface for LLM provider implementations
326
+ */
327
+ interface LLMProvider {
328
+ /**
329
+ * Generate structured output from LLM using Zod schema
330
+ */
331
+ generateStructured<T>(request: LLMRequest<T>): Promise<LLMResponse<T>>;
332
+ /**
333
+ * Generate plain text from LLM
334
+ */
335
+ generateText(messages: Message[], temperature?: number): Promise<TextGenerationResponse>;
336
+ }
337
+ /**
338
+ * Configuration for LLM provider
339
+ */
340
+ interface ProviderConfig {
341
+ type: 'openai' | 'anthropic' | 'google' | 'custom';
342
+ apiKey?: string;
343
+ model?: string;
344
+ temperature?: number;
345
+ baseURL?: string;
346
+ customProvider?: LLMProvider;
347
+ maxRetries?: number;
348
+ }
349
+
350
+ /**
351
+ * Stage 1: Detailed sentence analysis output (40+ metrics)
352
+ * Ported from Python SentenceAnalysesEvaluatorOutput
353
+ */
354
+ declare const SentenceAnalysisSchema: z.ZodObject<{
355
+ reasoning: z.ZodString;
356
+ num_sentences: z.ZodNumber;
357
+ num_words: z.ZodNumber;
358
+ flesch_kincaid_grade: z.ZodNumber;
359
+ num_simple_sentences: z.ZodNumber;
360
+ num_compound_sentences: z.ZodNumber;
361
+ num_complex_sentences: z.ZodNumber;
362
+ num_compound_complex_sentences: z.ZodNumber;
363
+ num_other_sentences: z.ZodNumber;
364
+ num_independent_clauses: z.ZodNumber;
365
+ num_subordinate_clauses: z.ZodNumber;
366
+ num_total_clauses: z.ZodNumber;
367
+ num_sentences_with_subordinate: z.ZodNumber;
368
+ num_sentences_with_multiple_subordinates: z.ZodNumber;
369
+ num_sentences_with_embedded_clauses: z.ZodNumber;
370
+ num_prepositional_phrases: z.ZodNumber;
371
+ num_participle_phrases: z.ZodNumber;
372
+ num_appositive_phrases: z.ZodNumber;
373
+ num_simple_transitions: z.ZodNumber;
374
+ num_sophisticated_transitions: z.ZodNumber;
375
+ words_in_simple_sentences: z.ZodNumber;
376
+ words_in_compound_sentences: z.ZodNumber;
377
+ words_in_complex_sentences: z.ZodNumber;
378
+ words_in_compound_complex_sentences: z.ZodNumber;
379
+ words_in_other_sentences: z.ZodNumber;
380
+ sentence_word_counts: z.ZodArray<z.ZodNumber, "many">;
381
+ num_one_concept_sentences: z.ZodNumber;
382
+ num_multi_concept_sentences: z.ZodNumber;
383
+ num_cleft_sentences: z.ZodNumber;
384
+ max_clauses_in_any_sentence: z.ZodNumber;
385
+ num_compound: z.ZodNumber;
386
+ num_basic_complex: z.ZodNumber;
387
+ num_advanced_complex: z.ZodNumber;
388
+ percentage_simple: z.ZodNumber;
389
+ percentage_compound: z.ZodNumber;
390
+ percentage_basic_complex: z.ZodNumber;
391
+ percentage_advanced_complex: z.ZodNumber;
392
+ }, "strip", z.ZodTypeAny, {
393
+ reasoning: string;
394
+ num_sentences: number;
395
+ num_words: number;
396
+ flesch_kincaid_grade: number;
397
+ num_simple_sentences: number;
398
+ num_compound_sentences: number;
399
+ num_complex_sentences: number;
400
+ num_compound_complex_sentences: number;
401
+ num_other_sentences: number;
402
+ num_independent_clauses: number;
403
+ num_subordinate_clauses: number;
404
+ num_total_clauses: number;
405
+ num_sentences_with_subordinate: number;
406
+ num_sentences_with_multiple_subordinates: number;
407
+ num_sentences_with_embedded_clauses: number;
408
+ num_prepositional_phrases: number;
409
+ num_participle_phrases: number;
410
+ num_appositive_phrases: number;
411
+ num_simple_transitions: number;
412
+ num_sophisticated_transitions: number;
413
+ words_in_simple_sentences: number;
414
+ words_in_compound_sentences: number;
415
+ words_in_complex_sentences: number;
416
+ words_in_compound_complex_sentences: number;
417
+ words_in_other_sentences: number;
418
+ sentence_word_counts: number[];
419
+ num_one_concept_sentences: number;
420
+ num_multi_concept_sentences: number;
421
+ num_cleft_sentences: number;
422
+ max_clauses_in_any_sentence: number;
423
+ num_compound: number;
424
+ num_basic_complex: number;
425
+ num_advanced_complex: number;
426
+ percentage_simple: number;
427
+ percentage_compound: number;
428
+ percentage_basic_complex: number;
429
+ percentage_advanced_complex: number;
430
+ }, {
431
+ reasoning: string;
432
+ num_sentences: number;
433
+ num_words: number;
434
+ flesch_kincaid_grade: number;
435
+ num_simple_sentences: number;
436
+ num_compound_sentences: number;
437
+ num_complex_sentences: number;
438
+ num_compound_complex_sentences: number;
439
+ num_other_sentences: number;
440
+ num_independent_clauses: number;
441
+ num_subordinate_clauses: number;
442
+ num_total_clauses: number;
443
+ num_sentences_with_subordinate: number;
444
+ num_sentences_with_multiple_subordinates: number;
445
+ num_sentences_with_embedded_clauses: number;
446
+ num_prepositional_phrases: number;
447
+ num_participle_phrases: number;
448
+ num_appositive_phrases: number;
449
+ num_simple_transitions: number;
450
+ num_sophisticated_transitions: number;
451
+ words_in_simple_sentences: number;
452
+ words_in_compound_sentences: number;
453
+ words_in_complex_sentences: number;
454
+ words_in_compound_complex_sentences: number;
455
+ words_in_other_sentences: number;
456
+ sentence_word_counts: number[];
457
+ num_one_concept_sentences: number;
458
+ num_multi_concept_sentences: number;
459
+ num_cleft_sentences: number;
460
+ max_clauses_in_any_sentence: number;
461
+ num_compound: number;
462
+ num_basic_complex: number;
463
+ num_advanced_complex: number;
464
+ percentage_simple: number;
465
+ percentage_compound: number;
466
+ percentage_basic_complex: number;
467
+ percentage_advanced_complex: number;
468
+ }>;
469
+ type SentenceAnalysis = z.infer<typeof SentenceAnalysisSchema>;
470
+ /**
471
+ * Stage 2: Final complexity classification
472
+ * Ported from Python ComplexityClassificationOutput
473
+ */
474
+ declare const ComplexityClassificationSchema: z.ZodObject<{
475
+ reasoning: z.ZodString;
476
+ answer: z.ZodEnum<["Slightly complex", "Moderately complex", "Very complex", "Exceedingly complex"]>;
477
+ }, "strip", z.ZodTypeAny, {
478
+ reasoning: string;
479
+ answer: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
480
+ }, {
481
+ reasoning: string;
482
+ answer: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
483
+ }>;
484
+ type ComplexityClassification = z.infer<typeof ComplexityClassificationSchema>;
485
+ /**
486
+ * Internal data structure for sentence structure evaluation
487
+ */
488
+ interface SentenceStructureInternal {
489
+ sentenceAnalysis: SentenceAnalysis;
490
+ features: SentenceFeatures;
491
+ complexity: ComplexityClassification;
492
+ }
493
+ /**
494
+ * Engineered features computed from sentence analysis
495
+ * These are calculated in TypeScript, not requested from LLM
496
+ */
497
+ interface SentenceFeatures extends SentenceAnalysis {
498
+ avg_words_per_sentence: number;
499
+ sentence_length_variation: number;
500
+ percent_short_sentences: number;
501
+ percent_medium_sentences: number;
502
+ percent_long_sentences: number;
503
+ percent_very_long_sentences: number;
504
+ percent_simple_sentences: number;
505
+ percent_compound_sentences: number;
506
+ percent_complex_sentences: number;
507
+ percent_compound_complex_sentences: number;
508
+ percent_other_sentences: number;
509
+ percent_words_in_simple_sentences: number;
510
+ percent_words_in_complex_sentences: number;
511
+ percent_words_in_compound_sentences: number;
512
+ percent_words_in_compound_complex_sentences: number;
513
+ percent_words_in_other_sentences: number;
514
+ avg_subordinates_per_sentence: number;
515
+ avg_clauses_per_sentence: number;
516
+ percent_sentences_with_subordinate: number;
517
+ percent_sentences_with_multiple_subordinates: number;
518
+ percent_sentences_with_embedded_clauses: number;
519
+ prep_phrase_density: number;
520
+ participle_phrase_density: number;
521
+ appositive_phrase_density: number;
522
+ avg_transitions_per_sentence: number;
523
+ percent_sophisticated_transitions: number;
524
+ percent_sentences_w_one_concept: number;
525
+ percent_sentences_w_multi_concept: number;
526
+ percent_cleft_sentences: number;
527
+ }
528
+
529
+ /**
530
+ * Vocabulary evaluation output schema
531
+ */
532
+ declare const VocabularyComplexitySchema: z.ZodObject<{
533
+ tier_2_words: z.ZodString;
534
+ tier_3_words: z.ZodString;
535
+ archaic_words: z.ZodString;
536
+ other_complex_words: z.ZodString;
537
+ complexity_score: z.ZodEnum<["Slightly complex", "Moderately complex", "Very complex", "Exceedingly complex"]>;
538
+ reasoning: z.ZodString;
539
+ }, "strip", z.ZodTypeAny, {
540
+ reasoning: string;
541
+ tier_2_words: string;
542
+ tier_3_words: string;
543
+ archaic_words: string;
544
+ other_complex_words: string;
545
+ complexity_score: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
546
+ }, {
547
+ reasoning: string;
548
+ tier_2_words: string;
549
+ tier_3_words: string;
550
+ archaic_words: string;
551
+ other_complex_words: string;
552
+ complexity_score: "Slightly complex" | "Moderately complex" | "Very complex" | "Exceedingly complex";
553
+ }>;
554
+ type VocabularyInternal = z.infer<typeof VocabularyComplexitySchema>;
555
+
556
+ /**
557
+ * Evaluation status
558
+ */
559
+ type EvaluationStatus = 'success' | 'error';
560
+ /**
561
+ * Token usage metrics from LLM providers
562
+ */
563
+ interface TokenUsage {
564
+ input_tokens: number;
565
+ output_tokens: number;
566
+ }
567
+ /**
568
+ * Per-stage details for multi-stage evaluations
569
+ */
570
+ interface StageDetail {
571
+ /** Stage name (e.g., "background_knowledge", "complexity_evaluation") */
572
+ stage: string;
573
+ /** Provider used for this stage (e.g., "openai:gpt-4o") */
574
+ provider: string;
575
+ /** Total latency including all retries (ms) */
576
+ latency_ms: number;
577
+ /** Token usage aggregated across all attempts */
578
+ token_usage?: TokenUsage;
579
+ /**
580
+ * Whether schema validation failed (indicates prompt needs clearer instructions)
581
+ *
582
+ * TODO: Not currently tracked. Vercel AI SDK abstracts validation away.
583
+ * To implement: Add custom retry wrapper that catches validation errors.
584
+ */
585
+ schema_validation_failed?: boolean;
586
+ }
587
+ /**
588
+ * Extensible metadata for telemetry events
589
+ */
590
+ interface TelemetryMetadata {
591
+ /** Detailed breakdown by stage (for multi-stage evaluations) */
592
+ stage_details?: StageDetail[];
593
+ }
594
+ /**
595
+ * Telemetry event payload
596
+ */
597
+ interface TelemetryEvent {
598
+ timestamp: string;
599
+ sdk_version: string;
600
+ evaluator_type: string;
601
+ grade?: string;
602
+ status: EvaluationStatus;
603
+ error_code?: string;
604
+ latency_ms: number;
605
+ text_length_chars: number;
606
+ provider: string;
607
+ token_usage?: TokenUsage;
608
+ metadata?: TelemetryMetadata;
609
+ input_text?: string;
610
+ }
611
+ /**
612
+ * Configuration for telemetry client
613
+ */
614
+ interface TelemetryConfig {
615
+ /** Analytics service endpoint URL */
616
+ endpoint: string;
617
+ /** Learning Commons partner key (optional, sent as X-API-Key header) */
618
+ partnerKey?: string;
619
+ /** Client ID for anonymous tracking (persistent UUID from ~/.config/learning-commons/config.json) */
620
+ clientId: string;
621
+ /** Enable telemetry (default: true) */
622
+ enabled: boolean;
623
+ /** Logger instance (respects the SDK's configured log level and custom logger) */
624
+ logger: Logger;
625
+ }
626
+
627
+ /**
628
+ * Telemetry client for sending analytics events
629
+ *
630
+ * Fire-and-forget implementation that never blocks SDK operations.
631
+ * Errors are logged but don't fail evaluations.
632
+ */
633
+ declare class TelemetryClient {
634
+ private config;
635
+ private logger;
636
+ constructor(config: TelemetryConfig);
637
+ /**
638
+ * Send telemetry event to analytics service
639
+ *
640
+ * Fire-and-forget: Errors are logged but don't throw.
641
+ */
642
+ send(event: TelemetryEvent): Promise<void>;
643
+ }
644
+
645
+ /**
646
+ * Granular telemetry configuration options
647
+ */
648
+ interface TelemetryOptions {
649
+ /** Enable telemetry (default: true) */
650
+ enabled?: boolean;
651
+ /** Record input text in telemetry (default: false) */
652
+ recordInputs?: boolean;
653
+ }
654
+ /**
655
+ * Base configuration for all evaluators
656
+ */
657
+ interface BaseEvaluatorConfig {
658
+ /** Google API key (for evaluators using Gemini) */
659
+ googleApiKey?: string;
660
+ /** OpenAI API key (for evaluators using GPT) */
661
+ openaiApiKey?: string;
662
+ /** Learning Commons partner key for authenticated telemetry (optional) */
663
+ partnerKey?: string;
664
+ /**
665
+ * Maximum number of retries for failed API calls (default: 2)
666
+ * Set to 0 to disable retries.
667
+ *
668
+ * Note: With maxRetries=2, a failed call will be attempted up to 3 times total
669
+ * (1 initial attempt + 2 retries)
670
+ */
671
+ maxRetries?: number;
672
+ /**
673
+ * Telemetry configuration (default: all enabled)
674
+ *
675
+ * Can be:
676
+ * - `true`: Enable with defaults (recordInputs: false)
677
+ * - `false`: Disable completely
678
+ * - `TelemetryOptions`: Granular control
679
+ */
680
+ telemetry?: boolean | TelemetryOptions;
681
+ /**
682
+ * Custom logger implementation (optional)
683
+ * If not provided, uses console logger with specified logLevel
684
+ */
685
+ logger?: Logger;
686
+ /**
687
+ * Log level for default console logger (default: WARN)
688
+ * Only used if custom logger is not provided
689
+ *
690
+ * - DEBUG: Very verbose, shows all operations
691
+ * - INFO: Normal operations
692
+ * - WARN: Warnings only (default)
693
+ * - ERROR: Errors only
694
+ * - SILENT: No logging
695
+ */
696
+ logLevel?: LogLevel;
697
+ }
698
+ /**
699
+ * Evaluator metadata interface
700
+ * Each evaluator must provide this metadata as static properties
701
+ */
702
+ interface EvaluatorMetadata {
703
+ /** Unique identifier for the evaluator (e.g., 'vocabulary', 'sentence-structure') */
704
+ readonly id: string;
705
+ /** Human-readable name (e.g., 'Vocabulary', 'Sentence Structure') */
706
+ readonly name: string;
707
+ /** Brief description of what the evaluator does */
708
+ readonly description: string;
709
+ /** Supported grade levels (e.g., ['3', '4', '5', ...]) */
710
+ readonly supportedGrades: readonly string[];
711
+ /** Whether this evaluator requires a Google API key */
712
+ readonly requiresGoogleKey: boolean;
713
+ /** Whether this evaluator requires an OpenAI API key */
714
+ readonly requiresOpenAIKey: boolean;
715
+ }
716
+ /**
717
+ * Abstract base class for all evaluators
718
+ *
719
+ * Provides common functionality:
720
+ * - Telemetry setup and event sending
721
+ * - Text validation
722
+ * - Grade validation (with overridable default)
723
+ * - Metadata creation
724
+ *
725
+ * Concrete evaluators must implement:
726
+ * - static metadata: Provide evaluator metadata (see EvaluatorMetadata interface)
727
+ */
728
+ declare abstract class BaseEvaluator {
729
+ protected telemetryClient?: TelemetryClient;
730
+ protected logger: Logger;
731
+ protected config: Required<Pick<BaseEvaluatorConfig, 'maxRetries'>> & {
732
+ telemetry: Required<TelemetryOptions>;
733
+ };
734
+ /**
735
+ * Static metadata for the evaluator
736
+ *
737
+ * Concrete evaluators MUST define this property.
738
+ *
739
+ * @example
740
+ * ```typescript
741
+ * class MyEvaluator extends BaseEvaluator {
742
+ * static readonly metadata = {
743
+ * id: 'my-evaluator',
744
+ * name: 'My Evaluator',
745
+ * description: 'Does something useful',
746
+ * supportedGrades: ['3', '4', '5'],
747
+ * requiresGoogleKey: true,
748
+ * requiresOpenAIKey: false,
749
+ * };
750
+ * }
751
+ * ```
752
+ */
753
+ static readonly metadata: EvaluatorMetadata;
754
+ constructor(config: BaseEvaluatorConfig);
755
+ /**
756
+ * Get metadata for this evaluator instance
757
+ * @throws {ConfigurationError} If the subclass has not defined static metadata
758
+ */
759
+ protected get metadata(): EvaluatorMetadata;
760
+ /**
761
+ * Validate that required API keys are provided based on metadata
762
+ * @throws {ConfigurationError} If required API keys are missing
763
+ */
764
+ private validateApiKeys;
765
+ /**
766
+ * Normalize telemetry config to standard format
767
+ */
768
+ private normalizeTelemetryConfig;
769
+ /**
770
+ * Get the evaluator type identifier from metadata
771
+ * @returns The evaluator type ID (e.g., "vocabulary", "sentence-structure")
772
+ */
773
+ protected getEvaluatorType(): string;
774
+ /**
775
+ * Validate text meets requirements
776
+ * Default implementation - can be overridden by concrete evaluators
777
+ *
778
+ * @throws {ValidationError} If text is invalid
779
+ */
780
+ protected validateText(text: string): void;
781
+ /**
782
+ * Validate grade is in supported range
783
+ * Default implementation - can be overridden by concrete evaluators
784
+ *
785
+ * @param grade - Grade level to validate
786
+ * @param validGrades - Set of valid grades for this evaluator
787
+ * @throws {ValidationError} If grade is invalid
788
+ */
789
+ protected validateGrade(grade: string, validGrades: Set<string>): void;
790
+ /**
791
+ * Send telemetry event to analytics service
792
+ * Common helper for all evaluators
793
+ */
794
+ protected sendTelemetry(params: {
795
+ status: 'success' | 'error';
796
+ latencyMs: number;
797
+ textLength: number;
798
+ grade?: string;
799
+ provider: string;
800
+ errorCode?: string;
801
+ tokenUsage?: TokenUsage;
802
+ metadata?: TelemetryMetadata;
803
+ inputText?: string;
804
+ }): Promise<void>;
805
+ }
806
+
807
+ /**
808
+ * Vocabulary Evaluator
809
+ *
810
+ * Evaluates vocabulary complexity of educational texts relative to grade level.
811
+ * Uses a 2-stage process:
812
+ * 1. Generate background knowledge assumption for the student's grade level
813
+ * 2. Evaluate vocabulary complexity using that background knowledge
814
+ *
815
+ * Based on Qual Text Complexity rubric (SAP) with 4 levels:
816
+ * - Slightly complex
817
+ * - Moderately complex
818
+ * - Very complex
819
+ * - Exceedingly complex
820
+ *
821
+ * @example
822
+ * ```typescript
823
+ * const evaluator = new VocabularyEvaluator({
824
+ * googleApiKey: process.env.GOOGLE_API_KEY,
825
+ * openaiApiKey: process.env.OPENAI_API_KEY
826
+ * });
827
+ *
828
+ * const result = await evaluator.evaluate(text, "3");
829
+ * console.log(result.score); // "Moderately complex"
830
+ * console.log(result.reasoning);
831
+ * ```
832
+ */
833
+ declare class VocabularyEvaluator extends BaseEvaluator {
834
+ static readonly metadata: {
835
+ id: string;
836
+ name: string;
837
+ description: string;
838
+ supportedGrades: readonly ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"];
839
+ requiresGoogleKey: boolean;
840
+ requiresOpenAIKey: boolean;
841
+ };
842
+ private grades34ComplexityProvider;
843
+ private otherGradesComplexityProvider;
844
+ private backgroundKnowledgeProvider;
845
+ constructor(config: BaseEvaluatorConfig);
846
+ /**
847
+ * Evaluate vocabulary complexity for a given text and grade level
848
+ *
849
+ * @param text - The text to evaluate
850
+ * @param grade - The target grade level (3-12)
851
+ * @returns Evaluation result with complexity score and detailed analysis
852
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
853
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
854
+ */
855
+ evaluate(text: string, grade: string): Promise<EvaluationResult<TextComplexityLevel, VocabularyInternal>>;
856
+ /**
857
+ * Stage 1: Generate background knowledge assumption
858
+ *
859
+ * Estimates what topics the student at the given grade level would be familiar with
860
+ * based on Common Core curriculum progression.
861
+ */
862
+ private getBackgroundKnowledgeAssumption;
863
+ /**
864
+ * Stage 2: Evaluate vocabulary complexity
865
+ *
866
+ * Uses the Qual Text Complexity rubric (SAP) and background knowledge to evaluate vocabulary complexity.
867
+ * Grades 3-4 use Gemini 2.5 Pro; grades 5-12 use GPT-4.1.
868
+ */
869
+ private evaluateComplexity;
870
+ }
871
+ /**
872
+ * Functional API for vocabulary evaluation
873
+ *
874
+ * @example
875
+ * ```typescript
876
+ * const result = await evaluateVocabulary(
877
+ * "The mitochondria is the powerhouse of the cell.",
878
+ * "3",
879
+ * {
880
+ * googleApiKey: process.env.GOOGLE_API_KEY,
881
+ * openaiApiKey: process.env.OPENAI_API_KEY
882
+ * }
883
+ * );
884
+ * ```
885
+ */
886
+ declare function evaluateVocabulary(text: string, grade: string, config: BaseEvaluatorConfig): Promise<EvaluationResult<TextComplexityLevel, VocabularyInternal>>;
887
+
888
+ /**
889
+ * Sentence Structure Evaluator
890
+ *
891
+ * Evaluates sentence structure complexity of educational texts relative to grade level.
892
+ * Uses a 2-stage process:
893
+ * 1. Analyze grammatical structure (sentence types, clauses, phrases, etc.)
894
+ * 2. Classify complexity using features and grade-specific rubric
895
+ *
896
+ * Based on Qualitative Text Complexity rubric with 4 levels:
897
+ * - Slightly complex
898
+ * - Moderately complex
899
+ * - Very complex
900
+ * - Exceedingly complex
901
+ *
902
+ * @example
903
+ * ```typescript
904
+ * const evaluator = new SentenceStructureEvaluator({
905
+ * openaiApiKey: process.env.OPENAI_API_KEY
906
+ * });
907
+ *
908
+ * const result = await evaluator.evaluate(text, "3");
909
+ * console.log(result.score); // "Moderately complex"
910
+ * console.log(result.reasoning);
911
+ * ```
912
+ */
913
+ declare class SentenceStructureEvaluator extends BaseEvaluator {
914
+ static readonly metadata: {
915
+ id: string;
916
+ name: string;
917
+ description: string;
918
+ supportedGrades: readonly ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"];
919
+ requiresGoogleKey: boolean;
920
+ requiresOpenAIKey: boolean;
921
+ };
922
+ private analysisProvider;
923
+ private complexityProvider;
924
+ constructor(config: BaseEvaluatorConfig);
925
+ /**
926
+ * Evaluate sentence structure complexity for a given text and grade level
927
+ *
928
+ * @param text - The text to evaluate
929
+ * @param grade - The target grade level (3-12)
930
+ * @returns Evaluation result with complexity score and detailed analysis
931
+ * @throws {ValidationError} If text is empty, too short/long, or grade is invalid
932
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
933
+ */
934
+ evaluate(text: string, grade: string): Promise<EvaluationResult<TextComplexityLevel, SentenceStructureInternal>>;
935
+ /**
936
+ * Stage 1: Analyze sentence grammatical structure
937
+ *
938
+ * Analyzes sentence types, clauses, phrases, transitions, and other grammatical features
939
+ */
940
+ private analyzeSentenceStructure;
941
+ /**
942
+ * Stage 2: Classify sentence structure complexity
943
+ *
944
+ * Uses engineered features and grade-specific rubric to classify complexity level
945
+ */
946
+ private classifyComplexity;
947
+ }
948
+ /**
949
+ * Functional API for sentence structure evaluation
950
+ *
951
+ * @example
952
+ * ```typescript
953
+ * const result = await evaluateSentenceStructure(
954
+ * "The cat sat on the mat. It was sleeping peacefully.",
955
+ * "3",
956
+ * {
957
+ * openaiApiKey: process.env.OPENAI_API_KEY
958
+ * }
959
+ * );
960
+ * ```
961
+ */
962
+ declare function evaluateSentenceStructure(text: string, grade: string, config: BaseEvaluatorConfig): Promise<EvaluationResult<TextComplexityLevel, SentenceStructureInternal>>;
963
+
964
+ /**
965
+ * Grade Level Appropriateness Evaluator
966
+ *
967
+ * Evaluates whether AI-generated text is suitable for a given grade band.
968
+ * Uses a structured 4-step analysis process:
969
+ * 1. Quantitative analysis (word count, Flesch-Kincaid)
970
+ * 2. Qualitative complexity (text structure, language, purpose, knowledge demands)
971
+ * 3. Background knowledge assessment
972
+ * 4. Synthesis and final recommendation
973
+ *
974
+ * Returns:
975
+ * - Target grade band (K-1, 2-3, 4-5, 6-8, 9-10, 11-CCR)
976
+ * - Alternative grade band (with scaffolding)
977
+ * - Specific scaffolding recommendations
978
+ *
979
+ * @example
980
+ * ```typescript
981
+ * const evaluator = new GradeLevelAppropriatenessEvaluator({
982
+ * googleApiKey: process.env.GOOGLE_API_KEY
983
+ * });
984
+ *
985
+ * const result = await evaluator.evaluate(text);
986
+ * console.log(result.score); // "9-10"
987
+ * console.log(result._internal.alternative_grade); // "6-8"
988
+ * console.log(result._internal.scaffolding_needed);
989
+ * ```
990
+ */
991
+ declare class GradeLevelAppropriatenessEvaluator extends BaseEvaluator {
992
+ static readonly metadata: {
993
+ id: string;
994
+ name: string;
995
+ description: string;
996
+ supportedGrades: readonly [];
997
+ requiresGoogleKey: boolean;
998
+ requiresOpenAIKey: boolean;
999
+ };
1000
+ private provider;
1001
+ constructor(config: BaseEvaluatorConfig);
1002
+ /**
1003
+ * Evaluate grade level appropriateness for a given text
1004
+ *
1005
+ * @param text - The text to evaluate
1006
+ * @returns Evaluation result with grade recommendations and scaffolding suggestions
1007
+ * @throws {ValidationError} If text is empty or too short/long
1008
+ * @throws {APIError} If LLM API calls fail (includes AuthenticationError, RateLimitError, NetworkError, TimeoutError)
1009
+ */
1010
+ evaluate(text: string): Promise<EvaluationResult<GradeBand, GradeLevelAppropriatenessInternal>>;
1011
+ }
1012
+ /**
1013
+ * Functional API for grade level appropriateness evaluation
1014
+ *
1015
+ * @example
1016
+ * ```typescript
1017
+ * const result = await evaluateGradeLevelAppropriateness(
1018
+ * "Tides are the rise and fall of sea levels...",
1019
+ * {
1020
+ * googleApiKey: process.env.GOOGLE_API_KEY
1021
+ * }
1022
+ * );
1023
+ * ```
1024
+ */
1025
+ declare function evaluateGradeLevelAppropriateness(text: string, config: BaseEvaluatorConfig): Promise<EvaluationResult<GradeBand, GradeLevelAppropriatenessInternal>>;
1026
+
1027
+ /**
1028
+ * Result map returned by TextComplexityEvaluator.
1029
+ * Each key holds the full evaluation result from its sub-evaluator, or an error if it failed.
1030
+ */
1031
+ interface TextComplexityResult {
1032
+ vocabulary: EvaluationResult<TextComplexityLevel, VocabularyInternal> | {
1033
+ error: Error;
1034
+ };
1035
+ sentenceStructure: EvaluationResult<TextComplexityLevel, SentenceStructureInternal> | {
1036
+ error: Error;
1037
+ };
1038
+ }
1039
+ /**
1040
+ * Text Complexity Evaluator
1041
+ *
1042
+ * Composite evaluator that analyzes both vocabulary and sentence structure complexity.
1043
+ * Runs both evaluations in parallel with concurrency control to avoid rate limiting.
1044
+ *
1045
+ * Uses:
1046
+ * - VocabularyEvaluator (Google Gemini 2.5 Pro + OpenAI GPT-4o)
1047
+ * - SentenceStructureEvaluator (OpenAI GPT-4o)
1048
+ *
1049
+ * @example
1050
+ * ```typescript
1051
+ * const evaluator = new TextComplexityEvaluator({
1052
+ * googleApiKey: process.env.GOOGLE_API_KEY,
1053
+ * openaiApiKey: process.env.OPENAI_API_KEY
1054
+ * });
1055
+ *
1056
+ * const result = await evaluator.evaluate(text, "5");
1057
+ * if (!('error' in result.vocabulary)) {
1058
+ * console.log(result.vocabulary.score); // "Moderately complex"
1059
+ * }
1060
+ * ```
1061
+ */
1062
+ declare class TextComplexityEvaluator extends BaseEvaluator {
1063
+ static readonly metadata: {
1064
+ id: string;
1065
+ name: string;
1066
+ description: string;
1067
+ supportedGrades: readonly ["3", "4", "5", "6", "7", "8", "9", "10", "11", "12"];
1068
+ requiresGoogleKey: boolean;
1069
+ requiresOpenAIKey: boolean;
1070
+ };
1071
+ private vocabularyEvaluator;
1072
+ private sentenceStructureEvaluator;
1073
+ private limit;
1074
+ constructor(config: BaseEvaluatorConfig);
1075
+ /**
1076
+ * Evaluate text complexity for a given text and grade level
1077
+ *
1078
+ * Runs vocabulary and sentence structure evaluations in parallel with concurrency control.
1079
+ * If both sub-evaluators fail, throws an error. Otherwise returns a result map where
1080
+ * failed sub-evaluators are represented as `{ error: Error }`.
1081
+ *
1082
+ * @param text - The text to evaluate
1083
+ * @param grade - The target grade level (3-12)
1084
+ * @returns Map of sub-evaluator results
1085
+ * @throws {ValidationError} If text is empty or grade is invalid
1086
+ * @throws {Error} If all sub-evaluators fail
1087
+ */
1088
+ evaluate(text: string, grade: string): Promise<TextComplexityResult>;
1089
+ /**
1090
+ * Run a sub-evaluator with error handling.
1091
+ * Returns the evaluation result or `{ error: Error }` if the evaluator throws.
1092
+ */
1093
+ private runSubEvaluator;
1094
+ }
1095
+ /**
1096
+ * Functional API for text complexity evaluation
1097
+ *
1098
+ * @example
1099
+ * ```typescript
1100
+ * const result = await evaluateTextComplexity(
1101
+ * "The cat sat on the mat.",
1102
+ * "5",
1103
+ * {
1104
+ * googleApiKey: process.env.GOOGLE_API_KEY,
1105
+ * openaiApiKey: process.env.OPENAI_API_KEY
1106
+ * }
1107
+ * );
1108
+ * ```
1109
+ */
1110
+ declare function evaluateTextComplexity(text: string, grade: string, config: BaseEvaluatorConfig): Promise<TextComplexityResult>;
1111
+
1112
+ /**
1113
+ * Calculate Flesch-Kincaid Grade Level
1114
+ * Equivalent to Python's textstat.flesch_kincaid_grade()
1115
+ */
1116
+ declare function calculateFleschKincaidGrade(text: string): number;
1117
+ /**
1118
+ * Additional readability metrics
1119
+ */
1120
+ interface ReadabilityMetrics {
1121
+ sentenceCount: number;
1122
+ wordCount: number;
1123
+ characterCount: number;
1124
+ syllableCount: number;
1125
+ avgWordsPerSentence: number;
1126
+ avgSyllablesPerWord: number;
1127
+ fleschKincaidGrade: number;
1128
+ }
1129
+ declare function calculateReadabilityMetrics(text: string): ReadabilityMetrics;
1130
+
1131
+ /**
1132
+ * Add engineered features to sentence analysis output
1133
+ * Ported from Python add_engineered_features function
1134
+ */
1135
+ declare function addEngineeredFeatures(analysis: SentenceAnalysis): SentenceFeatures;
1136
+ /**
1137
+ * Convert sentence features to JSON string for LLM prompt
1138
+ * Ported from Python row_to_features_json
1139
+ */
1140
+ declare function featuresToJSON(features: SentenceFeatures, decimals?: number, castToInt?: boolean): string;
1141
+
1142
+ export { APIError, AuthenticationError, type BaseEvaluatorConfig, type ComplexityClassification, ComplexityClassificationSchema, ConfigurationError, type EvaluationError, type EvaluationMetadata, type EvaluationResult, EvaluatorError, type EvaluatorMetadata, GradeBand, GradeLevelAppropriatenessEvaluator, type GradeLevelAppropriatenessInternal, GradeLevelAppropriatenessSchema, type LLMProvider, type LLMRequest, type LLMResponse, type LogContext, LogLevel, type Logger, type Message, NetworkError, type ProviderConfig, RateLimitError, type ReadabilityMetrics, type SentenceAnalysis, SentenceAnalysisSchema, type SentenceFeatures, SentenceStructureEvaluator, type SentenceStructureInternal, type TelemetryOptions, TextComplexityEvaluator, TextComplexityLevel, type TextComplexityResult, type TextGenerationResponse, TimeoutError, ValidationError, VocabularyEvaluator, type VocabularyInternal, addEngineeredFeatures, calculateFleschKincaidGrade, calculateReadabilityMetrics, evaluateGradeLevelAppropriateness, evaluateSentenceStructure, evaluateTextComplexity, evaluateVocabulary, featuresToJSON };