@learning-commons/evaluators 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +12 -0
- package/dist/batch/cli.js +39 -17
- package/dist/batch/cli.js.map +1 -1
- package/dist/batch/index.cjs +21 -8
- package/dist/batch/index.cjs.map +1 -1
- package/dist/batch/index.d.cts +1 -0
- package/dist/batch/index.d.ts +1 -0
- package/dist/batch/index.js +21 -8
- package/dist/batch/index.js.map +1 -1
- package/dist/index.cjs +18 -6
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +18 -6
- package/dist/index.js.map +1 -1
- package/package.json +6 -2
- package/src/batch/README.md +14 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the `@learning-commons/evaluators` TypeScript SDK will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.6.0](https://github.com/learning-commons-org/evaluators/compare/sdks-typescript-v0.5.0...sdks-typescript-v0.6.0) (2026-05-22)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Features
|
|
9
|
+
|
|
10
|
+
* **ts-sdk:** add bypassRowLimit option for batch evaluator ([#77](https://github.com/learning-commons-org/evaluators/issues/77)) ([902a60f](https://github.com/learning-commons-org/evaluators/commit/902a60fc934372a151f1d40c0b49ef3313d12609))
|
|
11
|
+
* **ts-sdk:** expose per-call token usage on EvaluationMetadata ([#59](https://github.com/learning-commons-org/evaluators/issues/59)) ([3c8fa0f](https://github.com/learning-commons-org/evaluators/commit/3c8fa0fd8e2389fc902c9cf1f63985b40d2e4b2c))
|
|
12
|
+
|
|
5
13
|
## [0.5.0](https://github.com/learning-commons-org/evaluators/compare/sdks-typescript-v0.4.0...sdks-typescript-v0.5.0) (2026-05-07)
|
|
6
14
|
|
|
7
15
|
|
package/README.md
CHANGED
|
@@ -70,6 +70,8 @@ await evaluator.evaluate(text: string, grade: string)
|
|
|
70
70
|
metadata: {
|
|
71
71
|
model: string;
|
|
72
72
|
processingTimeMs: number;
|
|
73
|
+
inputTokens: number;
|
|
74
|
+
outputTokens: number;
|
|
73
75
|
};
|
|
74
76
|
_internal: VocabularyInternal; // Detailed analysis
|
|
75
77
|
}
|
|
@@ -110,6 +112,8 @@ await evaluator.evaluate(text: string, grade: string)
|
|
|
110
112
|
metadata: {
|
|
111
113
|
model: string;
|
|
112
114
|
processingTimeMs: number;
|
|
115
|
+
inputTokens: number;
|
|
116
|
+
outputTokens: number;
|
|
113
117
|
};
|
|
114
118
|
_internal: {
|
|
115
119
|
sentenceAnalysis: SentenceAnalysis;
|
|
@@ -154,6 +158,8 @@ await evaluator.evaluate(text: string, grade: string)
|
|
|
154
158
|
metadata: {
|
|
155
159
|
model: string;
|
|
156
160
|
processingTimeMs: number;
|
|
161
|
+
inputTokens: number;
|
|
162
|
+
outputTokens: number;
|
|
157
163
|
};
|
|
158
164
|
_internal: {
|
|
159
165
|
identified_topics: string[];
|
|
@@ -218,6 +224,8 @@ await evaluator.evaluate(text: string, grade: string)
|
|
|
218
224
|
metadata: {
|
|
219
225
|
model: string;
|
|
220
226
|
processingTimeMs: number;
|
|
227
|
+
inputTokens: number;
|
|
228
|
+
outputTokens: number;
|
|
221
229
|
};
|
|
222
230
|
_internal: {
|
|
223
231
|
conventionality_features: string[];
|
|
@@ -346,6 +354,8 @@ await evaluator.evaluate(text: string)
|
|
|
346
354
|
metadata: {
|
|
347
355
|
model: string;
|
|
348
356
|
processingTimeMs: number;
|
|
357
|
+
inputTokens: number;
|
|
358
|
+
outputTokens: number;
|
|
349
359
|
};
|
|
350
360
|
_internal: {
|
|
351
361
|
grade: string;
|
|
@@ -391,6 +401,8 @@ await evaluator.evaluate(text: string, grade: string)
|
|
|
391
401
|
metadata: {
|
|
392
402
|
model: string;
|
|
393
403
|
processingTimeMs: number;
|
|
404
|
+
inputTokens: number;
|
|
405
|
+
outputTokens: number;
|
|
394
406
|
};
|
|
395
407
|
_internal: {
|
|
396
408
|
complexity_score: 'slightly_complex' | 'moderately_complex' | 'very_complex' | 'exceedingly_complex' | 'more_context_needed';
|
package/dist/batch/cli.js
CHANGED
|
@@ -1279,7 +1279,9 @@ var VocabularyEvaluator = class _VocabularyEvaluator extends BaseEvaluator {
|
|
|
1279
1279
|
reasoning: complexityResponse.data.reasoning,
|
|
1280
1280
|
metadata: {
|
|
1281
1281
|
model: modelLabel,
|
|
1282
|
-
processingTimeMs: latencyMs
|
|
1282
|
+
processingTimeMs: latencyMs,
|
|
1283
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
1284
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1283
1285
|
},
|
|
1284
1286
|
_internal: complexityResponse.data
|
|
1285
1287
|
};
|
|
@@ -1609,7 +1611,9 @@ var SentenceStructureEvaluator = class _SentenceStructureEvaluator extends BaseE
|
|
|
1609
1611
|
reasoning: complexityResponse.data.reasoning,
|
|
1610
1612
|
metadata: {
|
|
1611
1613
|
model: this.provider.label,
|
|
1612
|
-
processingTimeMs: latencyMs
|
|
1614
|
+
processingTimeMs: latencyMs,
|
|
1615
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
1616
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
1613
1617
|
},
|
|
1614
1618
|
_internal: {
|
|
1615
1619
|
sentenceAnalysis: analysisResponse.data,
|
|
@@ -1815,7 +1819,9 @@ var GradeLevelAppropriatenessEvaluator = class extends BaseEvaluator {
|
|
|
1815
1819
|
reasoning: response.data.reasoning,
|
|
1816
1820
|
metadata: {
|
|
1817
1821
|
model: this.provider.label,
|
|
1818
|
-
processingTimeMs: latencyMs
|
|
1822
|
+
processingTimeMs: latencyMs,
|
|
1823
|
+
inputTokens: tokenUsage.input_tokens,
|
|
1824
|
+
outputTokens: tokenUsage.output_tokens
|
|
1819
1825
|
},
|
|
1820
1826
|
_internal: response.data
|
|
1821
1827
|
};
|
|
@@ -2017,7 +2023,9 @@ var SmkEvaluator = class _SmkEvaluator extends BaseEvaluator {
|
|
|
2017
2023
|
reasoning: response.data.reasoning,
|
|
2018
2024
|
metadata: {
|
|
2019
2025
|
model: this.provider.label,
|
|
2020
|
-
processingTimeMs: latencyMs
|
|
2026
|
+
processingTimeMs: latencyMs,
|
|
2027
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
2028
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
2021
2029
|
},
|
|
2022
2030
|
_internal: response.data
|
|
2023
2031
|
};
|
|
@@ -2223,7 +2231,9 @@ var ConventionalityEvaluator = class _ConventionalityEvaluator extends BaseEvalu
|
|
|
2223
2231
|
reasoning: response.data.reasoning,
|
|
2224
2232
|
metadata: {
|
|
2225
2233
|
model: this.provider.label,
|
|
2226
|
-
processingTimeMs: latencyMs
|
|
2234
|
+
processingTimeMs: latencyMs,
|
|
2235
|
+
inputTokens: totalTokenUsage.input_tokens,
|
|
2236
|
+
outputTokens: totalTokenUsage.output_tokens
|
|
2227
2237
|
},
|
|
2228
2238
|
_internal: response.data
|
|
2229
2239
|
};
|
|
@@ -2498,7 +2508,9 @@ var PurposeEvaluator = class _PurposeEvaluator extends BaseEvaluator {
|
|
|
2498
2508
|
reasoning: response.data.reasoning,
|
|
2499
2509
|
metadata: {
|
|
2500
2510
|
model: this.provider.label,
|
|
2501
|
-
processingTimeMs: latencyMs
|
|
2511
|
+
processingTimeMs: latencyMs,
|
|
2512
|
+
inputTokens: tokenUsage.input_tokens,
|
|
2513
|
+
outputTokens: tokenUsage.output_tokens
|
|
2502
2514
|
},
|
|
2503
2515
|
_internal: response.data
|
|
2504
2516
|
};
|
|
@@ -2611,6 +2623,7 @@ var BatchEvaluator = class {
|
|
|
2611
2623
|
concurrency: 3,
|
|
2612
2624
|
maxRetries: 2,
|
|
2613
2625
|
telemetry: false,
|
|
2626
|
+
bypassRowLimit: false,
|
|
2614
2627
|
...config
|
|
2615
2628
|
};
|
|
2616
2629
|
this.limit = pLimit(this.config.concurrency);
|
|
@@ -2764,9 +2777,9 @@ var BatchEvaluator = class {
|
|
|
2764
2777
|
`Unknown evaluator group: "${groupId}". Available: ${EVALUATOR_GROUPS.map((g) => g.id).join(", ")}`
|
|
2765
2778
|
);
|
|
2766
2779
|
}
|
|
2767
|
-
if (inputs.length > group.maxInputRows) {
|
|
2780
|
+
if (!this.config.bypassRowLimit && inputs.length > group.maxInputRows) {
|
|
2768
2781
|
throw new Error(
|
|
2769
|
-
`Input exceeds limit for "${group.id}": ${inputs.length} rows (max ${group.maxInputRows}). Split into smaller batches.`
|
|
2782
|
+
`Input exceeds limit for "${group.id}": ${inputs.length} rows (max ${group.maxInputRows}). Split into smaller batches, or pass { bypassRowLimit: true } in BatchConfig to bypass (use --bypass-row-limit on the CLI).`
|
|
2770
2783
|
);
|
|
2771
2784
|
}
|
|
2772
2785
|
this.isCancelled = false;
|
|
@@ -4099,6 +4112,8 @@ function parseArgs() {
|
|
|
4099
4112
|
if (!isNaN(v) && v >= 0) result.maxRetries = v;
|
|
4100
4113
|
} else if (args[i] === "--no-telemetry") {
|
|
4101
4114
|
result.noTelemetry = true;
|
|
4115
|
+
} else if (args[i] === "--bypass-row-limit") {
|
|
4116
|
+
result.bypassRowLimit = true;
|
|
4102
4117
|
}
|
|
4103
4118
|
}
|
|
4104
4119
|
return result;
|
|
@@ -4132,16 +4147,22 @@ async function main() {
|
|
|
4132
4147
|
`);
|
|
4133
4148
|
const group = getAvailableGroups()[0];
|
|
4134
4149
|
console.log(`\u2713 Evaluator group: ${group.name}`);
|
|
4135
|
-
console.log(` ${group.description}
|
|
4136
|
-
console.log(` Row limit: ${group.maxInputRows}
|
|
4150
|
+
console.log(` ${group.description}
|
|
4137
4151
|
`);
|
|
4138
4152
|
if (inputs.length > group.maxInputRows) {
|
|
4139
|
-
|
|
4153
|
+
if (cliArgs.bypassRowLimit) {
|
|
4154
|
+
console.warn(`\u26A0\uFE0F Row limit bypassed: ${inputs.length} rows (default max ${group.maxInputRows}).`);
|
|
4155
|
+
console.warn(` Expect longer runtime and possible provider throttling.
|
|
4140
4156
|
`);
|
|
4141
|
-
|
|
4142
|
-
|
|
4143
|
-
|
|
4144
|
-
|
|
4157
|
+
} else {
|
|
4158
|
+
console.error(`\u274C Too many rows: ${inputs.length} (max ${group.maxInputRows} for this group)
|
|
4159
|
+
`);
|
|
4160
|
+
console.log("Suggestions:");
|
|
4161
|
+
console.log(` \u2022 Trim the CSV to ${group.maxInputRows} rows`);
|
|
4162
|
+
console.log(" \u2022 Split into multiple smaller batches");
|
|
4163
|
+
console.log(" \u2022 Re-run with --bypass-row-limit to skip this check (use with caution)\n");
|
|
4164
|
+
process.exit(1);
|
|
4165
|
+
}
|
|
4145
4166
|
}
|
|
4146
4167
|
let googleApiKey;
|
|
4147
4168
|
let openaiApiKey;
|
|
@@ -4218,7 +4239,7 @@ async function main() {
|
|
|
4218
4239
|
const totalTasks = inputs.length * group.evaluatorIds.length;
|
|
4219
4240
|
console.log(`
|
|
4220
4241
|
\u{1F4DD} Summary:`);
|
|
4221
|
-
console.log(` Input rows: ${inputs.length}`);
|
|
4242
|
+
console.log(` Input rows: ${inputs.length}${cliArgs.bypassRowLimit ? " (row limit bypassed)" : ""}`);
|
|
4222
4243
|
console.log(` Evaluators: ${group.evaluatorIds.length}`);
|
|
4223
4244
|
console.log(` Total tasks: ${totalTasks}`);
|
|
4224
4245
|
console.log(` Concurrency: ${cliArgs.concurrency ?? 3}`);
|
|
@@ -4243,7 +4264,8 @@ async function main() {
|
|
|
4243
4264
|
openaiApiKey,
|
|
4244
4265
|
concurrency: cliArgs.concurrency ?? 3,
|
|
4245
4266
|
maxRetries: cliArgs.maxRetries ?? 2,
|
|
4246
|
-
telemetry: !cliArgs.noTelemetry
|
|
4267
|
+
telemetry: !cliArgs.noTelemetry,
|
|
4268
|
+
bypassRowLimit: cliArgs.bypassRowLimit ?? false
|
|
4247
4269
|
});
|
|
4248
4270
|
let isShuttingDown = false;
|
|
4249
4271
|
const handleShutdown = () => {
|