@learning-commons/evaluators 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +188 -45
- package/dist/{base-Ced9oKKa.d.cts → base-DKcAYXfb.d.cts} +142 -9
- package/dist/{base-Ced9oKKa.d.ts → base-DKcAYXfb.d.ts} +142 -9
- package/dist/batch/cli.js +635 -227
- package/dist/batch/cli.js.map +1 -1
- package/dist/batch/index.cjs +618 -218
- package/dist/batch/index.cjs.map +1 -1
- package/dist/batch/index.d.cts +3 -1
- package/dist/batch/index.d.ts +3 -1
- package/dist/batch/index.js +617 -218
- package/dist/batch/index.js.map +1 -1
- package/dist/index.cjs +626 -217
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +155 -86
- package/dist/index.d.ts +155 -86
- package/dist/index.js +622 -218
- package/dist/index.js.map +1 -1
- package/package.json +13 -4
- package/src/batch/README.md +14 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@learning-commons/evaluators",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "TypeScript SDK for Learning Commons educational evaluators",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -28,6 +28,8 @@
|
|
|
28
28
|
"src/batch/README.md"
|
|
29
29
|
],
|
|
30
30
|
"scripts": {
|
|
31
|
+
"generate:schemas": "tsx scripts/generate-schema.ts ../../evals/prompts/purpose/config.json",
|
|
32
|
+
"generate:schemas:check": "tsx scripts/generate-schema.ts --check ../../evals/prompts/purpose/config.json",
|
|
31
33
|
"build": "tsup",
|
|
32
34
|
"dev": "tsup --watch",
|
|
33
35
|
"test": "vitest run",
|
|
@@ -48,6 +50,10 @@
|
|
|
48
50
|
],
|
|
49
51
|
"author": "Learning Commons",
|
|
50
52
|
"license": "MIT",
|
|
53
|
+
"publishConfig": {
|
|
54
|
+
"access": "public",
|
|
55
|
+
"registry": "https://registry.npmjs.org"
|
|
56
|
+
},
|
|
51
57
|
"repository": {
|
|
52
58
|
"type": "git",
|
|
53
59
|
"url": "git+https://github.com/learning-commons-org/evaluators.git",
|
|
@@ -56,7 +62,7 @@
|
|
|
56
62
|
"bugs": {
|
|
57
63
|
"url": "https://github.com/learning-commons-org/evaluators/issues"
|
|
58
64
|
},
|
|
59
|
-
"homepage": "https://github.com/learning-commons-org/evaluators#readme",
|
|
65
|
+
"homepage": "https://github.com/learning-commons-org/evaluators/tree/main/sdks/typescript#readme",
|
|
60
66
|
"peerDependencies": {
|
|
61
67
|
"@ai-sdk/anthropic": ">=3.0.0",
|
|
62
68
|
"@ai-sdk/google": ">=3.0.0",
|
|
@@ -77,23 +83,26 @@
|
|
|
77
83
|
"dependencies": {
|
|
78
84
|
"compromise": "^14.13.0",
|
|
79
85
|
"csv-parse": "^6.1.0",
|
|
80
|
-
"p-limit": "^
|
|
86
|
+
"p-limit": "^7.3.0",
|
|
81
87
|
"prompts": "^2.4.2",
|
|
82
88
|
"syllable": "^5.0.1",
|
|
89
|
+
"text-readability": "^1.1.1",
|
|
83
90
|
"zod": "^3.22.4"
|
|
84
91
|
},
|
|
85
92
|
"devDependencies": {
|
|
86
93
|
"@ai-sdk/anthropic": "^3.0.12",
|
|
87
94
|
"@ai-sdk/google": "^3.0.7",
|
|
88
95
|
"@ai-sdk/openai": "^3.0.9",
|
|
89
|
-
"@types/node": "^
|
|
96
|
+
"@types/node": "^25.6.0",
|
|
90
97
|
"@types/prompts": "^2.4.9",
|
|
91
98
|
"@typescript-eslint/eslint-plugin": "^6.19.0",
|
|
92
99
|
"@typescript-eslint/parser": "^6.19.0",
|
|
93
100
|
"@vitest/coverage-v8": "^4.0.17",
|
|
94
101
|
"ai": "^6.0.30",
|
|
95
102
|
"eslint": "^8.56.0",
|
|
103
|
+
"json-schema-to-zod": "^2.8.1",
|
|
96
104
|
"tsup": "^8.0.1",
|
|
105
|
+
"tsx": "^4.21.0",
|
|
97
106
|
"typescript": "^5.3.3",
|
|
98
107
|
"vitest": "^4.0.17"
|
|
99
108
|
},
|
package/src/batch/README.md
CHANGED
|
@@ -61,7 +61,7 @@ Any additional columns beyond `text` and `grade` are preserved as-is in the outp
|
|
|
61
61
|
|
|
62
62
|
The batch evaluator runs a fixed group of evaluators together. The current available group is:
|
|
63
63
|
|
|
64
|
-
- **text-complexity**: Runs grade-level appropriateness, subject matter knowledge, vocabulary complexity, sentence structure, and
|
|
64
|
+
- **text-complexity**: Runs grade-level appropriateness, subject matter knowledge, vocabulary complexity, sentence structure, conventionality, and purpose evaluators together (requires both Google and OpenAI API keys). Maximum 50 input rows. If you exceed the limit, the CLI will exit with an error and suggest splitting into smaller batches. The limit can be bypassed with `--bypass-row-limit` — see [CLI Flags](#cli-flags) below.
|
|
65
65
|
|
|
66
66
|
### Output Files
|
|
67
67
|
|
|
@@ -103,6 +103,19 @@ evaluators-batch --concurrency 5 --max-retries 3 --no-telemetry
|
|
|
103
103
|
| `--concurrency <n>` | `3` | Number of evaluations to run in parallel |
|
|
104
104
|
| `--max-retries <n>` | `2` | Number of times to retry a failed evaluation |
|
|
105
105
|
| `--no-telemetry` | telemetry on | Disable telemetry reporting |
|
|
106
|
+
| `--bypass-row-limit` | off | Skip the per-group input row limit (50). Use with caution: longer runs increase the risk of provider throttling. |
|
|
107
|
+
|
|
108
|
+
#### When to bypass the row limit
|
|
109
|
+
|
|
110
|
+
The 50-row default keeps runs short and reduces the risk of provider throttling on the underlying Google and OpenAI calls. If you understand those risks and want to process a larger CSV in one run, pass `--bypass-row-limit` on the CLI or `bypassRowLimit: true` when constructing a `BatchEvaluator` programmatically:
|
|
111
|
+
|
|
112
|
+
```ts
|
|
113
|
+
const evaluator = new BatchEvaluator({
|
|
114
|
+
googleApiKey,
|
|
115
|
+
openaiApiKey,
|
|
116
|
+
bypassRowLimit: true,
|
|
117
|
+
});
|
|
118
|
+
```
|
|
106
119
|
|
|
107
120
|
### API Keys
|
|
108
121
|
|