@learning-commons/evaluators 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/README.md +86 -4
- package/dist/base-Ced9oKKa.d.cts +331 -0
- package/dist/base-Ced9oKKa.d.ts +331 -0
- package/dist/batch/cli.js +3940 -0
- package/dist/batch/cli.js.map +1 -0
- package/dist/batch/index.cjs +3602 -0
- package/dist/batch/index.cjs.map +1 -0
- package/dist/batch/index.d.cts +145 -0
- package/dist/batch/index.d.ts +145 -0
- package/dist/batch/index.js +3572 -0
- package/dist/batch/index.js.map +1 -0
- package/dist/index.cjs +225 -7
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +93 -329
- package/dist/index.d.ts +93 -329
- package/dist/index.js +224 -8
- package/dist/index.js.map +1 -1
- package/package.json +28 -9
- package/src/batch/README.md +166 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import { T as TelemetryOptions } from '../base-Ced9oKKa.cjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Batch evaluation types
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Input row from CSV
|
|
9
|
+
*/
|
|
10
|
+
interface BatchInput {
|
|
11
|
+
text: string;
|
|
12
|
+
grade: string;
|
|
13
|
+
rowIndex: number;
|
|
14
|
+
originalRow: Record<string, unknown>;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Result from a single evaluation
|
|
18
|
+
*/
|
|
19
|
+
interface BatchResult {
|
|
20
|
+
rowIndex: number;
|
|
21
|
+
text: string;
|
|
22
|
+
grade: string;
|
|
23
|
+
evaluatorId: string;
|
|
24
|
+
status: 'success' | 'error';
|
|
25
|
+
score?: string;
|
|
26
|
+
reasoning?: string;
|
|
27
|
+
error?: string;
|
|
28
|
+
processingTimeMs: number;
|
|
29
|
+
originalRow: Record<string, unknown>;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Summary statistics for batch evaluation
|
|
33
|
+
*/
|
|
34
|
+
interface BatchSummary {
|
|
35
|
+
totalTasks: number;
|
|
36
|
+
successful: number;
|
|
37
|
+
failed: number;
|
|
38
|
+
durationMs: number;
|
|
39
|
+
resultsPerEvaluator: Record<string, {
|
|
40
|
+
successful: number;
|
|
41
|
+
failed: number;
|
|
42
|
+
}>;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Complete batch evaluation output
|
|
46
|
+
*/
|
|
47
|
+
interface BatchOutput {
|
|
48
|
+
results: BatchResult[];
|
|
49
|
+
summary: BatchSummary;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* A named group of evaluators that run together and share an HTML report format.
|
|
53
|
+
* This is the unit of selection exposed to users.
|
|
54
|
+
*/
|
|
55
|
+
interface EvaluatorGroup {
|
|
56
|
+
id: string;
|
|
57
|
+
name: string;
|
|
58
|
+
description: string;
|
|
59
|
+
/** IDs of the evaluators that belong to this group */
|
|
60
|
+
evaluatorIds: readonly string[];
|
|
61
|
+
requiresGoogleKey: boolean;
|
|
62
|
+
requiresOpenAIKey: boolean;
|
|
63
|
+
/** Maximum number of input rows allowed for this group */
|
|
64
|
+
maxInputRows: number;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Configuration for batch evaluation
|
|
68
|
+
*/
|
|
69
|
+
interface BatchConfig {
|
|
70
|
+
googleApiKey?: string;
|
|
71
|
+
openaiApiKey?: string;
|
|
72
|
+
concurrency?: number;
|
|
73
|
+
maxRetries?: number;
|
|
74
|
+
telemetry?: boolean | TelemetryOptions;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Returns the available evaluator groups.
|
|
79
|
+
*/
|
|
80
|
+
declare function getAvailableGroups(): EvaluatorGroup[];
|
|
81
|
+
/**
|
|
82
|
+
* Batch evaluator class
|
|
83
|
+
*
|
|
84
|
+
* Processes multiple texts in parallel using all evaluators in a group.
|
|
85
|
+
*/
|
|
86
|
+
declare class BatchEvaluator {
|
|
87
|
+
private config;
|
|
88
|
+
private limit;
|
|
89
|
+
private evaluatorInstances;
|
|
90
|
+
private isCancelled;
|
|
91
|
+
private completedResults;
|
|
92
|
+
constructor(config: BatchConfig);
|
|
93
|
+
/**
|
|
94
|
+
* Cancel ongoing evaluation.
|
|
95
|
+
* Returns partial results collected so far.
|
|
96
|
+
*/
|
|
97
|
+
cancel(): BatchResult[];
|
|
98
|
+
/**
|
|
99
|
+
* Initialize evaluator instances for the given IDs
|
|
100
|
+
*/
|
|
101
|
+
private initializeEvaluators;
|
|
102
|
+
/**
|
|
103
|
+
* Create tasks from inputs and evaluator IDs
|
|
104
|
+
*/
|
|
105
|
+
private createTasks;
|
|
106
|
+
/**
|
|
107
|
+
* Execute a single evaluation task
|
|
108
|
+
*/
|
|
109
|
+
private executeTask;
|
|
110
|
+
/**
|
|
111
|
+
* Calculate summary statistics
|
|
112
|
+
*/
|
|
113
|
+
private calculateSummary;
|
|
114
|
+
/**
|
|
115
|
+
* Run batch evaluation for an evaluator group.
|
|
116
|
+
*
|
|
117
|
+
* @param inputs - Array of input rows
|
|
118
|
+
* @param groupId - The evaluator group to run (see getAvailableGroups())
|
|
119
|
+
* @param onProgress - Optional callback invoked after each task completes
|
|
120
|
+
* @returns Batch evaluation results and summary
|
|
121
|
+
*/
|
|
122
|
+
evaluate(inputs: BatchInput[], groupId: string, onProgress?: (result: BatchResult) => void): Promise<BatchOutput>;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Parse a CSV file into BatchInput rows.
|
|
127
|
+
*
|
|
128
|
+
* Requires columns named "text" and "grade" (case-insensitive, whitespace-trimmed).
|
|
129
|
+
* Rows missing either value are silently skipped.
|
|
130
|
+
*
|
|
131
|
+
* @throws {Error} If the file does not exist, is empty, or is missing required columns
|
|
132
|
+
*/
|
|
133
|
+
declare function parseCSV(csvPath: string): BatchInput[];
|
|
134
|
+
|
|
135
|
+
declare function formatAsCSV(output: BatchOutput): string;
|
|
136
|
+
interface ReportMeta {
|
|
137
|
+
csvPath: string;
|
|
138
|
+
groupId: string;
|
|
139
|
+
reportId: string;
|
|
140
|
+
generatedAt: Date;
|
|
141
|
+
totalInputRows: number;
|
|
142
|
+
}
|
|
143
|
+
declare function formatAsHTML(output: BatchOutput, meta: ReportMeta): string;
|
|
144
|
+
|
|
145
|
+
export { type BatchConfig, BatchEvaluator, type BatchInput, type BatchOutput, type BatchResult, type BatchSummary, type EvaluatorGroup, type ReportMeta, formatAsCSV, formatAsHTML, getAvailableGroups, parseCSV };
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import { T as TelemetryOptions } from '../base-Ced9oKKa.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Batch evaluation types
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Input row from CSV
|
|
9
|
+
*/
|
|
10
|
+
interface BatchInput {
|
|
11
|
+
text: string;
|
|
12
|
+
grade: string;
|
|
13
|
+
rowIndex: number;
|
|
14
|
+
originalRow: Record<string, unknown>;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Result from a single evaluation
|
|
18
|
+
*/
|
|
19
|
+
interface BatchResult {
|
|
20
|
+
rowIndex: number;
|
|
21
|
+
text: string;
|
|
22
|
+
grade: string;
|
|
23
|
+
evaluatorId: string;
|
|
24
|
+
status: 'success' | 'error';
|
|
25
|
+
score?: string;
|
|
26
|
+
reasoning?: string;
|
|
27
|
+
error?: string;
|
|
28
|
+
processingTimeMs: number;
|
|
29
|
+
originalRow: Record<string, unknown>;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Summary statistics for batch evaluation
|
|
33
|
+
*/
|
|
34
|
+
interface BatchSummary {
|
|
35
|
+
totalTasks: number;
|
|
36
|
+
successful: number;
|
|
37
|
+
failed: number;
|
|
38
|
+
durationMs: number;
|
|
39
|
+
resultsPerEvaluator: Record<string, {
|
|
40
|
+
successful: number;
|
|
41
|
+
failed: number;
|
|
42
|
+
}>;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Complete batch evaluation output
|
|
46
|
+
*/
|
|
47
|
+
interface BatchOutput {
|
|
48
|
+
results: BatchResult[];
|
|
49
|
+
summary: BatchSummary;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* A named group of evaluators that run together and share an HTML report format.
|
|
53
|
+
* This is the unit of selection exposed to users.
|
|
54
|
+
*/
|
|
55
|
+
interface EvaluatorGroup {
|
|
56
|
+
id: string;
|
|
57
|
+
name: string;
|
|
58
|
+
description: string;
|
|
59
|
+
/** IDs of the evaluators that belong to this group */
|
|
60
|
+
evaluatorIds: readonly string[];
|
|
61
|
+
requiresGoogleKey: boolean;
|
|
62
|
+
requiresOpenAIKey: boolean;
|
|
63
|
+
/** Maximum number of input rows allowed for this group */
|
|
64
|
+
maxInputRows: number;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Configuration for batch evaluation
|
|
68
|
+
*/
|
|
69
|
+
interface BatchConfig {
|
|
70
|
+
googleApiKey?: string;
|
|
71
|
+
openaiApiKey?: string;
|
|
72
|
+
concurrency?: number;
|
|
73
|
+
maxRetries?: number;
|
|
74
|
+
telemetry?: boolean | TelemetryOptions;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Returns the available evaluator groups.
|
|
79
|
+
*/
|
|
80
|
+
declare function getAvailableGroups(): EvaluatorGroup[];
|
|
81
|
+
/**
|
|
82
|
+
* Batch evaluator class
|
|
83
|
+
*
|
|
84
|
+
* Processes multiple texts in parallel using all evaluators in a group.
|
|
85
|
+
*/
|
|
86
|
+
declare class BatchEvaluator {
|
|
87
|
+
private config;
|
|
88
|
+
private limit;
|
|
89
|
+
private evaluatorInstances;
|
|
90
|
+
private isCancelled;
|
|
91
|
+
private completedResults;
|
|
92
|
+
constructor(config: BatchConfig);
|
|
93
|
+
/**
|
|
94
|
+
* Cancel ongoing evaluation.
|
|
95
|
+
* Returns partial results collected so far.
|
|
96
|
+
*/
|
|
97
|
+
cancel(): BatchResult[];
|
|
98
|
+
/**
|
|
99
|
+
* Initialize evaluator instances for the given IDs
|
|
100
|
+
*/
|
|
101
|
+
private initializeEvaluators;
|
|
102
|
+
/**
|
|
103
|
+
* Create tasks from inputs and evaluator IDs
|
|
104
|
+
*/
|
|
105
|
+
private createTasks;
|
|
106
|
+
/**
|
|
107
|
+
* Execute a single evaluation task
|
|
108
|
+
*/
|
|
109
|
+
private executeTask;
|
|
110
|
+
/**
|
|
111
|
+
* Calculate summary statistics
|
|
112
|
+
*/
|
|
113
|
+
private calculateSummary;
|
|
114
|
+
/**
|
|
115
|
+
* Run batch evaluation for an evaluator group.
|
|
116
|
+
*
|
|
117
|
+
* @param inputs - Array of input rows
|
|
118
|
+
* @param groupId - The evaluator group to run (see getAvailableGroups())
|
|
119
|
+
* @param onProgress - Optional callback invoked after each task completes
|
|
120
|
+
* @returns Batch evaluation results and summary
|
|
121
|
+
*/
|
|
122
|
+
evaluate(inputs: BatchInput[], groupId: string, onProgress?: (result: BatchResult) => void): Promise<BatchOutput>;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Parse a CSV file into BatchInput rows.
|
|
127
|
+
*
|
|
128
|
+
* Requires columns named "text" and "grade" (case-insensitive, whitespace-trimmed).
|
|
129
|
+
* Rows missing either value are silently skipped.
|
|
130
|
+
*
|
|
131
|
+
* @throws {Error} If the file does not exist, is empty, or is missing required columns
|
|
132
|
+
*/
|
|
133
|
+
declare function parseCSV(csvPath: string): BatchInput[];
|
|
134
|
+
|
|
135
|
+
declare function formatAsCSV(output: BatchOutput): string;
|
|
136
|
+
interface ReportMeta {
|
|
137
|
+
csvPath: string;
|
|
138
|
+
groupId: string;
|
|
139
|
+
reportId: string;
|
|
140
|
+
generatedAt: Date;
|
|
141
|
+
totalInputRows: number;
|
|
142
|
+
}
|
|
143
|
+
declare function formatAsHTML(output: BatchOutput, meta: ReportMeta): string;
|
|
144
|
+
|
|
145
|
+
export { type BatchConfig, BatchEvaluator, type BatchInput, type BatchOutput, type BatchResult, type BatchSummary, type EvaluatorGroup, type ReportMeta, formatAsCSV, formatAsHTML, getAvailableGroups, parseCSV };
|