@plaited/agent-eval-harness 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -48,7 +48,7 @@ Pre-built schemas are available in `.claude/skills/headless-adapters/schemas/` f
48
48
  | `extract <raw> --schema <path>` | Parse raw output into trajectories |
49
49
  | `grade <results> --grader <path>` | Apply grader to extracted results |
50
50
  | `format <results> --style <style>` | Convert to markdown, csv, or jsonl |
51
- | `compare <run1> <run2>... --grader <path>` | Compare multiple runs |
51
+ | `compare <run1> <run2>...` | Compare runs (aggregate report) |
52
52
 
53
53
  ### Examples
54
54
 
@@ -76,9 +76,8 @@ cat prompts.jsonl | \
76
76
  bunx @plaited/agent-eval-harness grade -g ./grader.ts | \
77
77
  bunx @plaited/agent-eval-harness format -f markdown > report.md
78
78
 
79
- # Compare multiple runs
80
- bunx @plaited/agent-eval-harness compare run1.jsonl run2.jsonl \
81
- --grader ./compare-grader.ts -o comparison.jsonl
79
+ # Compare runs (built-in strategies: weighted, statistical, custom)
80
+ bunx @plaited/agent-eval-harness compare run1.jsonl run2.jsonl -o comparison.json
82
81
  ```
83
82
 
84
83
  ## Skills for AI Agents
@@ -117,7 +116,7 @@ CLI tool for capturing agent trajectories, optimized for TypeScript/JavaScript p
117
116
  | `extract` | Parse raw output into trajectories |
118
117
  | `grade` | Apply grader to extracted results |
119
118
  | `format` | Convert to markdown, csv, or jsonl |
120
- | `compare` | Compare multiple runs |
119
+ | `compare` | Compare runs (aggregate report) |
121
120
 
122
121
  **Use cases:**
123
122
  - Capturing trajectories for downstream evaluation (Braintrust, custom scorers)
package/bin/cli.ts CHANGED
@@ -30,7 +30,6 @@ import { schemasCli } from '../src/schemas/schemas-cli.ts'
30
30
  const [command, ...args] = Bun.argv.slice(2)
31
31
 
32
32
  const printHelp = () => {
33
- // biome-ignore lint/suspicious/noConsole: CLI help output
34
33
  console.log(`
35
34
  agent-eval-harness - CLI tool for agent evaluation
36
35
 
@@ -144,7 +143,6 @@ const main = async () => {
144
143
  case '-v':
145
144
  case '--version': {
146
145
  const { version } = await import('../package.json')
147
- // biome-ignore lint/suspicious/noConsole: CLI version output
148
146
  console.log(version)
149
147
  break
150
148
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@plaited/agent-eval-harness",
3
- "version": "0.5.2",
3
+ "version": "0.6.0",
4
4
  "description": "CLI tool for capturing agent trajectories from headless CLI agents",
5
5
  "license": "ISC",
6
6
  "engines": {
@@ -171,7 +171,6 @@ export const runBalance = async (config: BalanceConfig): Promise<BalanceAnalysis
171
171
  if (outputPath) {
172
172
  await Bun.write(resolvePath(outputPath), output)
173
173
  } else {
174
- // biome-ignore lint/suspicious/noConsole: CLI stdout output
175
174
  console.log(output)
176
175
  }
177
176
 
@@ -216,7 +215,6 @@ export const balance = async (args: string[]): Promise<void> => {
216
215
  })
217
216
 
218
217
  if (values.help) {
219
- // biome-ignore lint/suspicious/noConsole: CLI help output
220
218
  console.log(`
221
219
  Usage: agent-eval-harness balance <prompts.jsonl> [options]
222
220
 
@@ -218,6 +218,7 @@ export const runCalibrate = async (config: CalibrateConfig): Promise<Calibration
218
218
  output: result.output,
219
219
  hint: result.hint,
220
220
  trajectory: result.trajectory,
221
+ metadata: result.metadata,
221
222
  })
222
223
  }
223
224
 
@@ -231,7 +232,6 @@ export const runCalibrate = async (config: CalibrateConfig): Promise<Calibration
231
232
  if (outputPath) {
232
233
  await Bun.write(resolvePath(outputPath), markdown)
233
234
  } else {
234
- // biome-ignore lint/suspicious/noConsole: CLI stdout output
235
235
  console.log(markdown)
236
236
  }
237
237
 
@@ -260,7 +260,6 @@ export const calibrate = async (args: string[]): Promise<void> => {
260
260
  })
261
261
 
262
262
  if (values.help) {
263
- // biome-ignore lint/suspicious/noConsole: CLI help output
264
263
  console.log(`
265
264
  Usage: agent-eval-harness calibrate <results.jsonl> [options]
266
265
 
@@ -230,6 +230,7 @@ export const runCapture = async (config: CaptureConfig): Promise<CaptureResult[]
230
230
  output,
231
231
  hint: promptCase.hint,
232
232
  trajectory,
233
+ metadata: promptCase.metadata,
233
234
  })
234
235
  }
235
236
 
@@ -309,7 +310,6 @@ export const capture = async (args: string[]): Promise<void> => {
309
310
  })
310
311
 
311
312
  if (values.help) {
312
- // biome-ignore lint/suspicious/noConsole: CLI help output
313
313
  console.log(`
314
314
  Usage: agent-eval-harness capture <prompts.jsonl> --schema <schema.json> [options]
315
315
 
@@ -42,7 +42,7 @@ export const formatSummary = (result: CaptureResult): SummaryResult => {
42
42
  id: result.id,
43
43
  input: inputText,
44
44
  output: result.output,
45
- toolCalls: result.trajectory.filter((s) => s.type === 'tool_call').map((s) => (s as { name: string }).name),
45
+ toolCalls: result.trajectory.flatMap((s) => (s.type === 'tool_call' ? [s.name] : [])),
46
46
  duration: result.timing.end - result.timing.start,
47
47
  }
48
48
  }
@@ -160,7 +160,6 @@ export const runSummarize = async (config: SummarizeConfig): Promise<string> =>
160
160
  if (outputPath) {
161
161
  await Bun.write(resolvePath(outputPath), output)
162
162
  } else {
163
- // biome-ignore lint/suspicious/noConsole: CLI stdout output
164
163
  console.log(output)
165
164
  }
166
165
 
@@ -188,7 +187,6 @@ export const summarize = async (args: string[]): Promise<void> => {
188
187
  })
189
188
 
190
189
  if (values.help) {
191
- // biome-ignore lint/suspicious/noConsole: CLI help output
192
190
  console.log(`
193
191
  Usage: agent-eval-harness summarize <results.jsonl> [options]
194
192
 
@@ -216,6 +216,7 @@ export const runTrials = async (config: TrialsConfig): Promise<TrialResult[]> =>
216
216
  output,
217
217
  hint: promptCase.hint,
218
218
  trajectory,
219
+ metadata: promptCase.metadata,
219
220
  })
220
221
  entry.pass = graderResult.pass
221
222
  entry.score = graderResult.score
@@ -310,7 +311,6 @@ export const trials = async (args: string[]): Promise<void> => {
310
311
  })
311
312
 
312
313
  if (values.help) {
313
- // biome-ignore lint/suspicious/noConsole: CLI help output
314
314
  console.log(`
315
315
  Usage: agent-eval-harness trials <prompts.jsonl> --schema <schema.json> [options]
316
316
 
@@ -71,6 +71,7 @@ export const runValidateRefs = async (config: ValidateRefsConfig): Promise<Valid
71
71
  output: prompt.reference as string,
72
72
  hint: prompt.hint,
73
73
  trajectory: [], // No trajectory for reference validation
74
+ metadata: prompt.metadata,
74
75
  })
75
76
 
76
77
  results.push({
@@ -91,7 +92,6 @@ export const runValidateRefs = async (config: ValidateRefsConfig): Promise<Valid
91
92
  if (outputPath) {
92
93
  await Bun.write(resolvePath(outputPath), output)
93
94
  } else {
94
- // biome-ignore lint/suspicious/noConsole: CLI stdout output
95
95
  console.log(output)
96
96
  }
97
97
 
@@ -131,7 +131,6 @@ export const validateRefs = async (args: string[]): Promise<void> => {
131
131
  })
132
132
 
133
133
  if (values.help) {
134
- // biome-ignore lint/suspicious/noConsole: CLI help output
135
134
  console.log(`
136
135
  Usage: agent-eval-harness validate-refs <prompts.jsonl> --grader <grader.ts> [options]
137
136
 
package/src/core/core.ts CHANGED
@@ -11,7 +11,7 @@
11
11
  */
12
12
 
13
13
  // Loading utilities
14
- export { loadJsonl, loadPrompts, loadResults } from './loading.ts'
14
+ export { buildResultsIndex, countLines, loadJsonl, loadPrompts, loadResults, streamResults } from './loading.ts'
15
15
  // Output utilities
16
16
  export { getInputPreview, headTailPreview, logProgress, resolvePath, writeOutput } from './output.ts'
17
17
  // Trajectory utilities
@@ -94,3 +94,80 @@ export const loadJsonl = async <T = unknown>(path: string): Promise<T[]> => {
94
94
  }
95
95
  })
96
96
  }
97
+
98
+ // ============================================================================
99
+ // Streaming Loading
100
+ // ============================================================================
101
+
102
+ /**
103
+ * Stream capture results from a JSONL file.
104
+ *
105
+ * @remarks
106
+ * Memory-efficient alternative to loadResults for large files.
107
+ * Yields results one at a time using an async generator.
108
+ *
109
+ * @param path - Path to the results.jsonl file
110
+ * @yields Parsed and validated capture results
111
+ * @throws Error if file cannot be read or any line is invalid
112
+ *
113
+ * @public
114
+ */
115
+ export async function* streamResults(path: string): AsyncGenerator<CaptureResult, void, unknown> {
116
+ const file = Bun.file(path)
117
+ const text = await file.text()
118
+ const lines = text.split('\n')
119
+
120
+ for (let i = 0; i < lines.length; i++) {
121
+ const line = lines[i]?.trim()
122
+ if (!line) continue
123
+
124
+ try {
125
+ yield CaptureResultSchema.parse(JSON.parse(line))
126
+ } catch (error) {
127
+ throw new Error(`Invalid result at line ${i + 1}: ${error instanceof Error ? error.message : error}`)
128
+ }
129
+ }
130
+ }
131
+
132
+ /**
133
+ * Build an indexed map of results by ID using streaming.
134
+ *
135
+ * @remarks
136
+ * Memory-efficient for the compare command. Loads results into a Map
137
+ * keyed by ID for O(1) lookups without holding raw file content.
138
+ *
139
+ * For very large files (10k+ results), this is more memory-efficient than
140
+ * loading everything into an array and then building an index.
141
+ *
142
+ * @param path - Path to the results.jsonl file
143
+ * @returns Map of result ID to CaptureResult
144
+ *
145
+ * @public
146
+ */
147
+ export const buildResultsIndex = async (path: string): Promise<Map<string, CaptureResult>> => {
148
+ const index = new Map<string, CaptureResult>()
149
+
150
+ for await (const result of streamResults(path)) {
151
+ index.set(result.id, result)
152
+ }
153
+
154
+ return index
155
+ }
156
+
157
+ /**
158
+ * Count lines in a JSONL file without loading content.
159
+ *
160
+ * @remarks
161
+ * Useful for detecting large files that should use streaming mode.
162
+ * Uses byte-level scanning for efficiency.
163
+ *
164
+ * @param path - Path to the JSONL file
165
+ * @returns Number of non-empty lines
166
+ *
167
+ * @public
168
+ */
169
+ export const countLines = async (path: string): Promise<number> => {
170
+ const file = Bun.file(path)
171
+ const text = await file.text()
172
+ return text.split('\n').filter((line) => line.trim()).length
173
+ }
@@ -35,7 +35,6 @@ export const writeOutput = async (line: string, outputPath?: string, append?: bo
35
35
  await Bun.write(outputPath, `${line}\n`)
36
36
  }
37
37
  } else {
38
- // biome-ignore lint/suspicious/noConsole: CLI stdout output
39
38
  console.log(line)
40
39
  }
41
40
  }
package/src/core.ts CHANGED
@@ -8,6 +8,9 @@
8
8
  */
9
9
 
10
10
  export {
11
+ // Loading
12
+ buildResultsIndex,
13
+ countLines,
11
14
  // Trajectory
12
15
  detectTrajectoryRichness,
13
16
  extractContent,
@@ -18,11 +21,11 @@ export {
18
21
  getInputPreview,
19
22
  hasToolErrors,
20
23
  headTailPreview,
21
- // Loading
22
24
  loadJsonl,
23
25
  loadPrompts,
24
26
  loadResults,
25
27
  logProgress,
26
28
  resolvePath,
29
+ streamResults,
27
30
  writeOutput,
28
31
  } from './core/core.ts'
@@ -0,0 +1,187 @@
1
+ /**
2
+ * Built-in statistical significance comparison grader.
3
+ *
4
+ * @remarks
5
+ * Uses bootstrap sampling to compute confidence intervals for score estimates.
6
+ * Flags when the winner is statistically significant (p<0.05, non-overlapping CIs).
7
+ *
8
+ * Bootstrap iterations can be customized via environment variable:
9
+ * - `COMPARE_BOOTSTRAP_ITERATIONS` (default: 1000)
10
+ *
11
+ * @packageDocumentation
12
+ */
13
+
14
+ import type { ComparisonGrader, ComparisonGraderInput, ComparisonGraderResult } from '../pipeline/pipeline.types.ts'
15
+
16
+ /** Default number of bootstrap iterations */
17
+ const DEFAULT_ITERATIONS = 1000
18
+
19
+ /**
20
+ * Bootstrap confidence interval result.
21
+ */
22
+ type BootstrapResult = {
23
+ /** Estimated mean from bootstrap */
24
+ mean: number
25
+ /** 95% confidence interval [lower, upper] */
26
+ ci95: [number, number]
27
+ }
28
+
29
+ /**
30
+ * Compute bootstrap confidence interval for sample mean.
31
+ *
32
+ * @remarks
33
+ * Bootstrap resampling provides robust confidence intervals without
34
+ * assuming a specific distribution. For small samples, it's more
35
+ * reliable than parametric methods.
36
+ *
37
+ * @param samples - Array of numeric samples
38
+ * @param iterations - Number of bootstrap iterations
39
+ * @returns Bootstrap mean and 95% confidence interval
40
+ */
41
+ const bootstrap = (samples: number[], iterations: number = DEFAULT_ITERATIONS): BootstrapResult => {
42
+ if (samples.length === 0) {
43
+ return { mean: 0, ci95: [0, 0] }
44
+ }
45
+
46
+ if (samples.length === 1) {
47
+ const value = samples[0] ?? 0
48
+ return { mean: value, ci95: [value, value] }
49
+ }
50
+
51
+ const means: number[] = []
52
+
53
+ for (let i = 0; i < iterations; i++) {
54
+ // Resample with replacement - we know samples.length > 1 at this point
55
+ const resampled = Array.from(
56
+ { length: samples.length },
57
+ () => samples[Math.floor(Math.random() * samples.length)] as number,
58
+ )
59
+
60
+ // Compute mean of resampled data
61
+ const sum = resampled.reduce((acc, val) => acc + val, 0)
62
+ means.push(sum / resampled.length)
63
+ }
64
+
65
+ // Sort means for percentile calculation
66
+ means.sort((a, b) => a - b)
67
+
68
+ // 95% CI: 2.5th and 97.5th percentiles
69
+ const lowerIdx = Math.floor(iterations * 0.025)
70
+ const upperIdx = Math.floor(iterations * 0.975)
71
+
72
+ return {
73
+ mean: means[Math.floor(iterations / 2)] ?? 0,
74
+ ci95: [means[lowerIdx] ?? 0, means[upperIdx] ?? 0],
75
+ }
76
+ }
77
+
78
+ /**
79
+ * Get bootstrap iterations from environment variable.
80
+ *
81
+ * @returns Number of bootstrap iterations
82
+ */
83
+ const getIterationsFromEnv = (): number => {
84
+ const envValue = process.env.COMPARE_BOOTSTRAP_ITERATIONS
85
+ if (!envValue) return DEFAULT_ITERATIONS
86
+
87
+ const parsed = Number.parseInt(envValue, 10)
88
+ return Number.isNaN(parsed) || parsed < 100 ? DEFAULT_ITERATIONS : parsed
89
+ }
90
+
91
+ /**
92
+ * Statistical significance comparison grader.
93
+ *
94
+ * @remarks
95
+ * Compares runs using bootstrap sampling to determine if differences
96
+ * are statistically significant. When confidence intervals don't overlap,
97
+ * the difference is flagged as significant (p<0.05).
98
+ *
99
+ * **Single-sample limitation:** When comparing individual prompts, each run
100
+ * provides only one score sample. Bootstrap with a single sample yields a
101
+ * degenerate CI of `[value, value]`. This grader is most useful when:
102
+ * - Aggregating results across multiple prompts
103
+ * - Using with the full comparison report (which combines per-prompt comparisons)
104
+ *
105
+ * For single-prompt comparisons, consider the weighted grader instead.
106
+ *
107
+ * @public
108
+ */
109
+ export const grade: ComparisonGrader = async ({ runs }: ComparisonGraderInput): Promise<ComparisonGraderResult> => {
110
+ const iterations = getIterationsFromEnv()
111
+
112
+ // Collect scores for each run
113
+ const runStats = Object.entries(runs).map(([label, run]) => {
114
+ // Use grader score if available, otherwise 0
115
+ const score = run.score?.score ?? 0
116
+
117
+ // For single-prompt comparison, we only have one sample
118
+ // In practice, this grader is most useful when aggregating across prompts
119
+ const stats = bootstrap([score], iterations)
120
+
121
+ return { label, score, stats }
122
+ })
123
+
124
+ // Sort by bootstrap mean descending
125
+ const sorted = runStats.sort((a, b) => b.stats.mean - a.stats.mean)
126
+
127
+ // Check if winner is statistically significant
128
+ // CIs don't overlap = significant difference (approximately p<0.05)
129
+ let isSignificant = false
130
+ const first = sorted[0]
131
+ const second = sorted[1]
132
+ if (first && second) {
133
+ // Non-overlapping: first's lower bound > second's upper bound
134
+ isSignificant = first.stats.ci95[0] > second.stats.ci95[1]
135
+ }
136
+
137
+ const reasoning = isSignificant
138
+ ? `Winner "${first?.label}" is statistically significant (p<0.05, non-overlapping 95% CIs)`
139
+ : 'No statistically significant difference between top runs (overlapping 95% CIs)'
140
+
141
+ return {
142
+ rankings: sorted.map((s, i) => ({
143
+ run: s.label,
144
+ rank: i + 1,
145
+ score: s.stats.mean,
146
+ })),
147
+ reasoning,
148
+ }
149
+ }
150
+
151
+ /**
152
+ * Create a statistical grader with custom iteration count.
153
+ *
154
+ * @param iterations - Number of bootstrap iterations
155
+ * @returns Comparison grader function
156
+ *
157
+ * @public
158
+ */
159
+ export const createStatisticalGrader = (iterations: number = DEFAULT_ITERATIONS): ComparisonGrader => {
160
+ return async ({ runs }: ComparisonGraderInput): Promise<ComparisonGraderResult> => {
161
+ const runStats = Object.entries(runs).map(([label, run]) => {
162
+ const score = run.score?.score ?? 0
163
+ const stats = bootstrap([score], iterations)
164
+ return { label, score, stats }
165
+ })
166
+
167
+ const sorted = runStats.sort((a, b) => b.stats.mean - a.stats.mean)
168
+
169
+ let isSignificant = false
170
+ const first = sorted[0]
171
+ const second = sorted[1]
172
+ if (first && second) {
173
+ isSignificant = first.stats.ci95[0] > second.stats.ci95[1]
174
+ }
175
+
176
+ return {
177
+ rankings: sorted.map((s, i) => ({
178
+ run: s.label,
179
+ rank: i + 1,
180
+ score: s.stats.mean,
181
+ })),
182
+ reasoning: isSignificant
183
+ ? `Winner "${first?.label}" is statistically significant (p<0.05)`
184
+ : 'No statistically significant difference between top runs',
185
+ }
186
+ }
187
+ }
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Built-in weighted multi-dimensional comparison grader.
3
+ *
4
+ * @remarks
5
+ * Configurable weights for quality, latency, and reliability.
6
+ * Default strategy when no `--grader` is specified for the compare command.
7
+ *
8
+ * Weights can be customized via environment variables:
9
+ * - `COMPARE_QUALITY` (default: 0.5)
10
+ * - `COMPARE_LATENCY` (default: 0.3)
11
+ * - `COMPARE_RELIABILITY` (default: 0.2)
12
+ *
13
+ * @packageDocumentation
14
+ */
15
+
16
+ import type { ComparisonGrader, ComparisonGraderInput, ComparisonGraderResult } from '../pipeline/pipeline.types.ts'
17
+
18
+ /**
19
+ * Weight configuration for comparison dimensions.
20
+ */
21
+ export type Weights = {
22
+ /** Weight for quality (pass/score) - how much correctness matters */
23
+ quality: number
24
+ /** Weight for latency - how much speed matters */
25
+ latency: number
26
+ /** Weight for reliability - how much error-free execution matters */
27
+ reliability: number
28
+ }
29
+
30
+ /** Default weights: quality=0.5, latency=0.3, reliability=0.2 */
31
+ export const DEFAULT_WEIGHTS: Weights = {
32
+ quality: 0.5,
33
+ latency: 0.3,
34
+ reliability: 0.2,
35
+ }
36
+
37
+ /**
38
+ * Read weights from environment variables with fallback to defaults.
39
+ *
40
+ * @returns Weights configuration
41
+ */
42
+ export const getWeightsFromEnv = (): Weights => {
43
+ const quality = Number.parseFloat(process.env.COMPARE_QUALITY ?? String(DEFAULT_WEIGHTS.quality))
44
+ const latency = Number.parseFloat(process.env.COMPARE_LATENCY ?? String(DEFAULT_WEIGHTS.latency))
45
+ const reliability = Number.parseFloat(process.env.COMPARE_RELIABILITY ?? String(DEFAULT_WEIGHTS.reliability))
46
+
47
+ return {
48
+ quality: Number.isNaN(quality) ? DEFAULT_WEIGHTS.quality : quality,
49
+ latency: Number.isNaN(latency) ? DEFAULT_WEIGHTS.latency : latency,
50
+ reliability: Number.isNaN(reliability) ? DEFAULT_WEIGHTS.reliability : reliability,
51
+ }
52
+ }
53
+
54
+ /**
55
+ * Create a weighted comparison grader with custom weights.
56
+ *
57
+ * @param weights - Weight configuration for comparison dimensions
58
+ * @returns Comparison grader function
59
+ *
60
+ * @public
61
+ */
62
+ export const createWeightedGrader = (weights: Weights = DEFAULT_WEIGHTS): ComparisonGrader => {
63
+ return async ({ runs }: ComparisonGraderInput): Promise<ComparisonGraderResult> => {
64
+ const scores = Object.entries(runs).map(([label, run]) => {
65
+ // Quality score: use grader score if available, otherwise 0
66
+ // Note: run.score is only present if the result was graded
67
+ const qualityScore = run.score?.score ?? 0
68
+
69
+ // Latency score: inverse relationship (faster = better)
70
+ // Normalize: 1 / (1 + duration/1000) gives ~0.5 at 1s, ~0.1 at 10s
71
+ const duration = run.duration ?? 10000
72
+ const latencyScore = 1 / (1 + duration / 1000)
73
+
74
+ // Reliability score: 1 if no errors, 0 if errors
75
+ const hasErrors = run.toolErrors ?? false
76
+ const reliabilityScore = hasErrors ? 0 : 1
77
+
78
+ // Weighted combination
79
+ const weighted =
80
+ qualityScore * weights.quality + latencyScore * weights.latency + reliabilityScore * weights.reliability
81
+
82
+ return { label, weighted, qualityScore, latencyScore, reliabilityScore }
83
+ })
84
+
85
+ // Sort by weighted score descending (highest = best)
86
+ const sorted = scores.sort((a, b) => b.weighted - a.weighted)
87
+
88
+ return {
89
+ rankings: sorted.map((s, i) => ({
90
+ run: s.label,
91
+ rank: i + 1,
92
+ score: s.weighted,
93
+ })),
94
+ reasoning: `Weighted: quality=${weights.quality}, latency=${weights.latency}, reliability=${weights.reliability}`,
95
+ }
96
+ }
97
+ }
98
+
99
+ /**
100
+ * Default weighted comparison grader using environment or default weights.
101
+ *
102
+ * @remarks
103
+ * This is the default grader used when `--strategy weighted` is specified
104
+ * or when no strategy is specified for the compare command.
105
+ *
106
+ * @public
107
+ */
108
+ export const grade: ComparisonGrader = async (input: ComparisonGraderInput): Promise<ComparisonGraderResult> => {
109
+ const weights = getWeightsFromEnv()
110
+ const grader = createWeightedGrader(weights)
111
+ return grader(input)
112
+ }