@learning-commons/evaluators 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@learning-commons/evaluators",
3
- "version": "0.3.0",
3
+ "version": "0.5.0",
4
4
  "description": "TypeScript SDK for Learning Commons educational evaluators",
5
5
  "type": "module",
6
6
  "types": "./dist/index.d.ts",
@@ -9,15 +9,27 @@
9
9
  "types": "./dist/index.d.ts",
10
10
  "import": "./dist/index.js",
11
11
  "require": "./dist/index.cjs"
12
+ },
13
+ "./batch": {
14
+ "types": "./dist/batch/index.d.ts",
15
+ "import": "./dist/batch/index.js",
16
+ "require": "./dist/batch/index.cjs"
12
17
  }
13
18
  },
14
19
  "sideEffects": false,
20
+ "bin": {
21
+ "evaluators-batch": "./dist/batch/cli.js"
22
+ },
15
23
  "files": [
16
24
  "dist",
17
25
  "README.md",
18
- "LICENSE"
26
+ "CHANGELOG.md",
27
+ "LICENSE",
28
+ "src/batch/README.md"
19
29
  ],
20
30
  "scripts": {
31
+ "generate:schemas": "tsx scripts/generate-schema.ts ../../evals/prompts/purpose/config.json",
32
+ "generate:schemas:check": "tsx scripts/generate-schema.ts --check ../../evals/prompts/purpose/config.json",
21
33
  "build": "tsup",
22
34
  "dev": "tsup --watch",
23
35
  "test": "vitest run",
@@ -29,7 +41,7 @@
29
41
  "test:coverage": "vitest run --coverage",
30
42
  "test:ci": "npm run test:unit && npm run test:integration:dist",
31
43
  "typecheck": "tsc --noEmit",
32
- "lint": "eslint src --ext .ts",
44
+ "lint": "eslint src tests --ext .ts",
33
45
  "prepublishOnly": "npm run build"
34
46
  },
35
47
  "keywords": [
@@ -48,33 +60,45 @@
48
60
  },
49
61
  "homepage": "https://github.com/learning-commons-org/evaluators#readme",
50
62
  "peerDependencies": {
51
- "ai": ">=6.0.0",
52
- "@ai-sdk/openai": ">=3.0.0",
63
+ "@ai-sdk/anthropic": ">=3.0.0",
53
64
  "@ai-sdk/google": ">=3.0.0",
54
- "@ai-sdk/anthropic": ">=3.0.0"
65
+ "@ai-sdk/openai": ">=3.0.0",
66
+ "ai": ">=6.0.0"
55
67
  },
56
68
  "peerDependenciesMeta": {
57
- "@ai-sdk/openai": { "optional": true },
58
- "@ai-sdk/google": { "optional": true },
59
- "@ai-sdk/anthropic": { "optional": true }
69
+ "@ai-sdk/openai": {
70
+ "optional": true
71
+ },
72
+ "@ai-sdk/google": {
73
+ "optional": true
74
+ },
75
+ "@ai-sdk/anthropic": {
76
+ "optional": true
77
+ }
60
78
  },
61
79
  "dependencies": {
62
80
  "compromise": "^14.13.0",
63
- "p-limit": "^5.0.0",
81
+ "csv-parse": "^6.1.0",
82
+ "p-limit": "^7.3.0",
83
+ "prompts": "^2.4.2",
64
84
  "syllable": "^5.0.1",
85
+ "text-readability": "^1.1.1",
65
86
  "zod": "^3.22.4"
66
87
  },
67
88
  "devDependencies": {
68
89
  "@ai-sdk/anthropic": "^3.0.12",
69
90
  "@ai-sdk/google": "^3.0.7",
70
91
  "@ai-sdk/openai": "^3.0.9",
71
- "@types/node": "^20.11.5",
92
+ "@types/node": "^25.6.0",
93
+ "@types/prompts": "^2.4.9",
72
94
  "@typescript-eslint/eslint-plugin": "^6.19.0",
73
95
  "@typescript-eslint/parser": "^6.19.0",
74
96
  "@vitest/coverage-v8": "^4.0.17",
75
97
  "ai": "^6.0.30",
76
98
  "eslint": "^8.56.0",
99
+ "json-schema-to-zod": "^2.8.1",
77
100
  "tsup": "^8.0.1",
101
+ "tsx": "^4.21.0",
78
102
  "typescript": "^5.3.3",
79
103
  "vitest": "^4.0.17"
80
104
  },
@@ -0,0 +1,166 @@
1
+ # Batch CSV Evaluator
2
+
3
+ Evaluate multiple texts from a CSV file using a group of evaluators, with results output in CSV and HTML formats.
4
+
5
+ ## Usage
6
+
7
+ ### Installation
8
+
9
+ After publishing to npm:
10
+
11
+ ```bash
12
+ # Install globally
13
+ npm install -g @learning-commons/evaluators
14
+
15
+ # Or run directly with npx
16
+ npx evaluators-batch
17
+ ```
18
+
19
+ ### Interactive Mode
20
+
21
+ Run the batch evaluator interactively from any directory:
22
+
23
+ ```bash
24
+ # If installed globally
25
+ evaluators-batch
26
+
27
+ # Or with npx
28
+ npx evaluators-batch
29
+ ```
30
+
31
+ **Important:** Run this command from the directory containing your CSV file, or provide an absolute path to your CSV.
32
+
33
+ The CLI will guide you through:
34
+ 1. **CSV File Path**: Location of your input CSV file
35
+ 2. **API Keys**: Enter required API keys (only prompted for keys the group requires)
36
+ 3. **Output Directory**: Where to save results (default: timestamped folder in current directory)
37
+ 4. **Confirmation**: Review summary before starting
38
+
39
+ The output directory is automatically created with a human-readable timestamp:
40
+ ```
41
+ batch-results-2024-02-07_14-30-22/
42
+ ├── results.csv
43
+ └── results.html
44
+ ```
45
+
46
+ ### Input CSV Format
47
+
48
+ Your CSV must have a `text` column and a `grade` column (both case-insensitive). Any additional columns are preserved as-is in the output.
49
+
50
+ Example `input.csv`:
51
+ ```csv
52
+ text,grade
53
+ "The cat sat on the mat.",3
54
+ "Photosynthesis is the process by which plants convert sunlight into energy.",5
55
+ "The mitochondria are the powerhouse of the cell.",8
56
+ ```
57
+
58
+ Any additional columns beyond `text` and `grade` are preserved as-is in the output.
59
+
60
+ ### Evaluator Groups
61
+
62
+ The batch evaluator runs a fixed group of evaluators together. The current available group is:
63
+
64
+ - **text-complexity**: Runs grade-level appropriateness, subject matter knowledge, vocabulary complexity, sentence structure, conventionality, and purpose evaluators together (requires both Google and OpenAI API keys). Maximum 50 input rows. If you exceed the limit, the CLI will exit with an error and suggest splitting into smaller batches.
65
+
66
+ ### Output Files
67
+
68
+ Two files are generated:
69
+
70
+ 1. **CSV** (`results.csv`):
71
+ - Spreadsheet-compatible format
72
+ - Original CSV columns preserved, followed by `{evaluator}_score`, `{evaluator}_reasoning`, and `{evaluator}_status` columns for each evaluator
73
+
74
+ 2. **HTML** (`results.html`):
75
+ - Summary dashboard with grade-level distribution and text complexity charts
76
+ - Full results table with per-evaluator scores and reasoning
77
+ - Opens automatically in your default browser after evaluation completes
78
+
79
+ During evaluation, real-time progress is displayed:
80
+
81
+ ```
82
+ Processing evaluations...
83
+ ████████████░░░░░░░░ 60% (30/50)
84
+ ✓ grade-level-appropriateness: 6/10 successful
85
+ ✓ subject-matter-knowledge: 6/10 successful
86
+ ✓ vocabulary: 6/10 successful
87
+ ✓ sentence-structure: 6/10 successful
88
+ ⏳ conventionality: 6/10 successful
89
+
90
+ ⏱ Elapsed: 2m 15s | Estimated remaining: 1m 30s
91
+ ```
92
+
93
+ ### CLI Flags
94
+
95
+ You can override defaults by passing flags when running the command:
96
+
97
+ ```bash
98
+ evaluators-batch --concurrency 5 --max-retries 3 --no-telemetry
99
+ ```
100
+
101
+ | Flag | Default | Description |
102
+ |---|---|---|
103
+ | `--concurrency <n>` | `3` | Number of evaluations to run in parallel |
104
+ | `--max-retries <n>` | `2` | Number of times to retry a failed evaluation |
105
+ | `--no-telemetry` | telemetry on | Disable telemetry reporting |
106
+
107
+ ### API Keys
108
+
109
+ You can provide API keys in two ways:
110
+ 1. **Environment variables**: `GOOGLE_API_KEY`, `OPENAI_API_KEY` — used as defaults in the prompts
111
+ 2. **Interactive prompts**: Enter when prompted (keys are masked)
112
+
113
+ ### Graceful Shutdown
114
+
115
+ Press `Ctrl+C` during evaluation to gracefully shut down:
116
+
117
+ 1. **In-flight tasks complete**: Running evaluations finish processing
118
+ 2. **New tasks cancelled**: Pending tasks are skipped
119
+ 3. **Partial results saved**: All completed results are saved to `results-partial.*` files
120
+ 4. **Progress preserved**: No loss of work done so far
121
+
122
+ Example:
123
+ ```bash
124
+ # Press Ctrl+C during a long batch evaluation
125
+
126
+ ⚠️ Shutdown requested. Saving partial results...
127
+ (Press Ctrl+C again to force quit)
128
+
129
+ ✓ Saved 15 results to:
130
+ ./batch-results-2024-02-07_14-30-22/
131
+ ├── results-partial.csv
132
+ └── results-partial.html
133
+ ```
134
+
135
+ Press `Ctrl+C` twice to force quit immediately (not recommended — may lose in-flight results).
136
+
137
+ ---
138
+
139
+ ## Development & Testing
140
+
141
+ ### Running Locally (Before Publishing)
142
+
143
+ ```bash
144
+ # From the SDK root directory
145
+ cd sdks/typescript
146
+
147
+ # Build the project
148
+ npm run build
149
+
150
+ # Run the batch CLI directly
151
+ node dist/batch/cli.js
152
+ ```
153
+
154
+ ### Testing the Package Locally
155
+
156
+ ```bash
157
+ # Build and pack
158
+ npm run build
159
+ npm pack
160
+ # Creates: learning-commons-evaluators-x.x.x.tgz
161
+
162
+ # Test installation in another directory
163
+ cd /tmp
164
+ npm install /path/to/learning-commons-evaluators-x.x.x.tgz
165
+ evaluators-batch
166
+ ```