@learning-commons/evaluators 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +52 -0
- package/README.md +191 -45
- package/dist/base-DKcAYXfb.d.cts +464 -0
- package/dist/base-DKcAYXfb.d.ts +464 -0
- package/dist/batch/cli.js +4326 -0
- package/dist/batch/cli.js.map +1 -0
- package/dist/batch/index.cjs +3989 -0
- package/dist/batch/index.cjs.map +1 -0
- package/dist/batch/index.d.cts +146 -0
- package/dist/batch/index.d.ts +146 -0
- package/dist/batch/index.js +3958 -0
- package/dist/batch/index.js.map +1 -0
- package/dist/index.cjs +610 -213
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +153 -414
- package/dist/index.d.ts +153 -414
- package/dist/index.js +606 -214
- package/dist/index.js.map +1 -1
- package/package.json +35 -11
- package/src/batch/README.md +166 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@learning-commons/evaluators",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "TypeScript SDK for Learning Commons educational evaluators",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -9,15 +9,27 @@
|
|
|
9
9
|
"types": "./dist/index.d.ts",
|
|
10
10
|
"import": "./dist/index.js",
|
|
11
11
|
"require": "./dist/index.cjs"
|
|
12
|
+
},
|
|
13
|
+
"./batch": {
|
|
14
|
+
"types": "./dist/batch/index.d.ts",
|
|
15
|
+
"import": "./dist/batch/index.js",
|
|
16
|
+
"require": "./dist/batch/index.cjs"
|
|
12
17
|
}
|
|
13
18
|
},
|
|
14
19
|
"sideEffects": false,
|
|
20
|
+
"bin": {
|
|
21
|
+
"evaluators-batch": "./dist/batch/cli.js"
|
|
22
|
+
},
|
|
15
23
|
"files": [
|
|
16
24
|
"dist",
|
|
17
25
|
"README.md",
|
|
18
|
-
"
|
|
26
|
+
"CHANGELOG.md",
|
|
27
|
+
"LICENSE",
|
|
28
|
+
"src/batch/README.md"
|
|
19
29
|
],
|
|
20
30
|
"scripts": {
|
|
31
|
+
"generate:schemas": "tsx scripts/generate-schema.ts ../../evals/prompts/purpose/config.json",
|
|
32
|
+
"generate:schemas:check": "tsx scripts/generate-schema.ts --check ../../evals/prompts/purpose/config.json",
|
|
21
33
|
"build": "tsup",
|
|
22
34
|
"dev": "tsup --watch",
|
|
23
35
|
"test": "vitest run",
|
|
@@ -29,7 +41,7 @@
|
|
|
29
41
|
"test:coverage": "vitest run --coverage",
|
|
30
42
|
"test:ci": "npm run test:unit && npm run test:integration:dist",
|
|
31
43
|
"typecheck": "tsc --noEmit",
|
|
32
|
-
"lint": "eslint src --ext .ts",
|
|
44
|
+
"lint": "eslint src tests --ext .ts",
|
|
33
45
|
"prepublishOnly": "npm run build"
|
|
34
46
|
},
|
|
35
47
|
"keywords": [
|
|
@@ -48,33 +60,45 @@
|
|
|
48
60
|
},
|
|
49
61
|
"homepage": "https://github.com/learning-commons-org/evaluators#readme",
|
|
50
62
|
"peerDependencies": {
|
|
51
|
-
"ai": ">=
|
|
52
|
-
"@ai-sdk/openai": ">=3.0.0",
|
|
63
|
+
"@ai-sdk/anthropic": ">=3.0.0",
|
|
53
64
|
"@ai-sdk/google": ">=3.0.0",
|
|
54
|
-
"@ai-sdk/
|
|
65
|
+
"@ai-sdk/openai": ">=3.0.0",
|
|
66
|
+
"ai": ">=6.0.0"
|
|
55
67
|
},
|
|
56
68
|
"peerDependenciesMeta": {
|
|
57
|
-
"@ai-sdk/openai": {
|
|
58
|
-
|
|
59
|
-
|
|
69
|
+
"@ai-sdk/openai": {
|
|
70
|
+
"optional": true
|
|
71
|
+
},
|
|
72
|
+
"@ai-sdk/google": {
|
|
73
|
+
"optional": true
|
|
74
|
+
},
|
|
75
|
+
"@ai-sdk/anthropic": {
|
|
76
|
+
"optional": true
|
|
77
|
+
}
|
|
60
78
|
},
|
|
61
79
|
"dependencies": {
|
|
62
80
|
"compromise": "^14.13.0",
|
|
63
|
-
"
|
|
81
|
+
"csv-parse": "^6.1.0",
|
|
82
|
+
"p-limit": "^7.3.0",
|
|
83
|
+
"prompts": "^2.4.2",
|
|
64
84
|
"syllable": "^5.0.1",
|
|
85
|
+
"text-readability": "^1.1.1",
|
|
65
86
|
"zod": "^3.22.4"
|
|
66
87
|
},
|
|
67
88
|
"devDependencies": {
|
|
68
89
|
"@ai-sdk/anthropic": "^3.0.12",
|
|
69
90
|
"@ai-sdk/google": "^3.0.7",
|
|
70
91
|
"@ai-sdk/openai": "^3.0.9",
|
|
71
|
-
"@types/node": "^
|
|
92
|
+
"@types/node": "^25.6.0",
|
|
93
|
+
"@types/prompts": "^2.4.9",
|
|
72
94
|
"@typescript-eslint/eslint-plugin": "^6.19.0",
|
|
73
95
|
"@typescript-eslint/parser": "^6.19.0",
|
|
74
96
|
"@vitest/coverage-v8": "^4.0.17",
|
|
75
97
|
"ai": "^6.0.30",
|
|
76
98
|
"eslint": "^8.56.0",
|
|
99
|
+
"json-schema-to-zod": "^2.8.1",
|
|
77
100
|
"tsup": "^8.0.1",
|
|
101
|
+
"tsx": "^4.21.0",
|
|
78
102
|
"typescript": "^5.3.3",
|
|
79
103
|
"vitest": "^4.0.17"
|
|
80
104
|
},
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# Batch CSV Evaluator
|
|
2
|
+
|
|
3
|
+
Evaluate multiple texts from a CSV file using a group of evaluators, with results output in CSV and HTML formats.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
### Installation
|
|
8
|
+
|
|
9
|
+
After publishing to npm:
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Install globally
|
|
13
|
+
npm install -g @learning-commons/evaluators
|
|
14
|
+
|
|
15
|
+
# Or run directly with npx
|
|
16
|
+
npx evaluators-batch
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
### Interactive Mode
|
|
20
|
+
|
|
21
|
+
Run the batch evaluator interactively from any directory:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# If installed globally
|
|
25
|
+
evaluators-batch
|
|
26
|
+
|
|
27
|
+
# Or with npx
|
|
28
|
+
npx evaluators-batch
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**Important:** Run this command from the directory containing your CSV file, or provide an absolute path to your CSV.
|
|
32
|
+
|
|
33
|
+
The CLI will guide you through:
|
|
34
|
+
1. **CSV File Path**: Location of your input CSV file
|
|
35
|
+
2. **API Keys**: Enter required API keys (only prompted for keys the group requires)
|
|
36
|
+
3. **Output Directory**: Where to save results (default: timestamped folder in current directory)
|
|
37
|
+
4. **Confirmation**: Review summary before starting
|
|
38
|
+
|
|
39
|
+
The output directory is automatically created with a human-readable timestamp:
|
|
40
|
+
```
|
|
41
|
+
batch-results-2024-02-07_14-30-22/
|
|
42
|
+
├── results.csv
|
|
43
|
+
└── results.html
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Input CSV Format
|
|
47
|
+
|
|
48
|
+
Your CSV must have a `text` column and a `grade` column (both case-insensitive). Any additional columns are preserved as-is in the output.
|
|
49
|
+
|
|
50
|
+
Example `input.csv`:
|
|
51
|
+
```csv
|
|
52
|
+
text,grade
|
|
53
|
+
"The cat sat on the mat.",3
|
|
54
|
+
"Photosynthesis is the process by which plants convert sunlight into energy.",5
|
|
55
|
+
"The mitochondria are the powerhouse of the cell.",8
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Any additional columns beyond `text` and `grade` are preserved as-is in the output.
|
|
59
|
+
|
|
60
|
+
### Evaluator Groups
|
|
61
|
+
|
|
62
|
+
The batch evaluator runs a fixed group of evaluators together. The current available group is:
|
|
63
|
+
|
|
64
|
+
- **text-complexity**: Runs grade-level appropriateness, subject matter knowledge, vocabulary complexity, sentence structure, conventionality, and purpose evaluators together (requires both Google and OpenAI API keys). Maximum 50 input rows. If you exceed the limit, the CLI will exit with an error and suggest splitting into smaller batches.
|
|
65
|
+
|
|
66
|
+
### Output Files
|
|
67
|
+
|
|
68
|
+
Two files are generated:
|
|
69
|
+
|
|
70
|
+
1. **CSV** (`results.csv`):
|
|
71
|
+
- Spreadsheet-compatible format
|
|
72
|
+
- Original CSV columns preserved, followed by `{evaluator}_score`, `{evaluator}_reasoning`, and `{evaluator}_status` columns for each evaluator
|
|
73
|
+
|
|
74
|
+
2. **HTML** (`results.html`):
|
|
75
|
+
- Summary dashboard with grade-level distribution and text complexity charts
|
|
76
|
+
- Full results table with per-evaluator scores and reasoning
|
|
77
|
+
- Opens automatically in your default browser after evaluation completes
|
|
78
|
+
|
|
79
|
+
During evaluation, real-time progress is displayed:
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
Processing evaluations...
|
|
83
|
+
████████████░░░░░░░░ 60% (30/50)
|
|
84
|
+
✓ grade-level-appropriateness: 6/10 successful
|
|
85
|
+
✓ subject-matter-knowledge: 6/10 successful
|
|
86
|
+
✓ vocabulary: 6/10 successful
|
|
87
|
+
✓ sentence-structure: 6/10 successful
|
|
88
|
+
⏳ conventionality: 6/10 successful
|
|
89
|
+
|
|
90
|
+
⏱ Elapsed: 2m 15s | Estimated remaining: 1m 30s
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### CLI Flags
|
|
94
|
+
|
|
95
|
+
You can override defaults by passing flags when running the command:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
evaluators-batch --concurrency 5 --max-retries 3 --no-telemetry
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
| Flag | Default | Description |
|
|
102
|
+
|---|---|---|
|
|
103
|
+
| `--concurrency <n>` | `3` | Number of evaluations to run in parallel |
|
|
104
|
+
| `--max-retries <n>` | `2` | Number of times to retry a failed evaluation |
|
|
105
|
+
| `--no-telemetry` | telemetry on | Disable telemetry reporting |
|
|
106
|
+
|
|
107
|
+
### API Keys
|
|
108
|
+
|
|
109
|
+
You can provide API keys in two ways:
|
|
110
|
+
1. **Environment variables**: `GOOGLE_API_KEY`, `OPENAI_API_KEY` — used as defaults in the prompts
|
|
111
|
+
2. **Interactive prompts**: Enter when prompted (keys are masked)
|
|
112
|
+
|
|
113
|
+
### Graceful Shutdown
|
|
114
|
+
|
|
115
|
+
Press `Ctrl+C` during evaluation to gracefully shut down:
|
|
116
|
+
|
|
117
|
+
1. **In-flight tasks complete**: Running evaluations finish processing
|
|
118
|
+
2. **New tasks cancelled**: Pending tasks are skipped
|
|
119
|
+
3. **Partial results saved**: All completed results are saved to `results-partial.*` files
|
|
120
|
+
4. **Progress preserved**: No loss of work done so far
|
|
121
|
+
|
|
122
|
+
Example:
|
|
123
|
+
```bash
|
|
124
|
+
# Press Ctrl+C during a long batch evaluation
|
|
125
|
+
|
|
126
|
+
⚠️ Shutdown requested. Saving partial results...
|
|
127
|
+
(Press Ctrl+C again to force quit)
|
|
128
|
+
|
|
129
|
+
✓ Saved 15 results to:
|
|
130
|
+
./batch-results-2024-02-07_14-30-22/
|
|
131
|
+
├── results-partial.csv
|
|
132
|
+
└── results-partial.html
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Press `Ctrl+C` twice to force quit immediately (not recommended — may lose in-flight results).
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## Development & Testing
|
|
140
|
+
|
|
141
|
+
### Running Locally (Before Publishing)
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
# From the SDK root directory
|
|
145
|
+
cd sdks/typescript
|
|
146
|
+
|
|
147
|
+
# Build the project
|
|
148
|
+
npm run build
|
|
149
|
+
|
|
150
|
+
# Run the batch CLI directly
|
|
151
|
+
node dist/batch/cli.js
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Testing the Package Locally
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Build and pack
|
|
158
|
+
npm run build
|
|
159
|
+
npm pack
|
|
160
|
+
# Creates: learning-commons-evaluators-x.x.x.tgz
|
|
161
|
+
|
|
162
|
+
# Test installation in another directory
|
|
163
|
+
cd /tmp
|
|
164
|
+
npm install /path/to/learning-commons-evaluators-x.x.x.tgz
|
|
165
|
+
evaluators-batch
|
|
166
|
+
```
|