@smithers-orchestrator/scorers 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -7
- package/src/__type-tests__/smithersScorers.test-d.ts +49 -0
- package/src/aggregate.js +12 -8
- package/src/index.d.ts +47 -1
- package/src/index.js +7 -2
- package/src/llmJudge.js +19 -3
- package/src/run-scorers.js +0 -0
- package/src/types.ts +4 -0
- package/src/builtins.js +0 -5
- package/src/create-scorer.js +0 -7
- package/src/react-types.ts +0 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@smithers-orchestrator/scorers",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.25.0",
|
|
4
4
|
"description": "Smithers scorer definitions, execution, aggregation, and persistence helpers",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -33,12 +33,12 @@
|
|
|
33
33
|
"drizzle-orm": "^0.45.2",
|
|
34
34
|
"effect": "^3.21.1",
|
|
35
35
|
"zod": "^4.3.6",
|
|
36
|
-
"@smithers-orchestrator/agents": "0.
|
|
37
|
-
"@smithers-orchestrator/
|
|
38
|
-
"@smithers-orchestrator/
|
|
39
|
-
"@smithers-orchestrator/
|
|
40
|
-
"@smithers-orchestrator/
|
|
41
|
-
"@smithers-orchestrator/
|
|
36
|
+
"@smithers-orchestrator/agents": "0.25.0",
|
|
37
|
+
"@smithers-orchestrator/db": "0.25.0",
|
|
38
|
+
"@smithers-orchestrator/errors": "0.25.0",
|
|
39
|
+
"@smithers-orchestrator/observability": "0.25.0",
|
|
40
|
+
"@smithers-orchestrator/graph": "0.25.0",
|
|
41
|
+
"@smithers-orchestrator/scheduler": "0.25.0"
|
|
42
42
|
},
|
|
43
43
|
"devDependencies": {
|
|
44
44
|
"@types/bun": "latest",
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { smithersScorers } from "../index.js";
|
|
2
|
+
|
|
3
|
+
type InsertScorer = typeof smithersScorers.$inferInsert;
|
|
4
|
+
type SelectScorer = typeof smithersScorers.$inferSelect;
|
|
5
|
+
|
|
6
|
+
const insertRow: InsertScorer = {
|
|
7
|
+
id: "score-1",
|
|
8
|
+
runId: "run-1",
|
|
9
|
+
nodeId: "node-1",
|
|
10
|
+
scorerId: "accuracy",
|
|
11
|
+
scorerName: "Accuracy",
|
|
12
|
+
source: "batch",
|
|
13
|
+
score: 0.95,
|
|
14
|
+
scoredAtMs: 1_700_000_000_000,
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
const selectRow: SelectScorer = {
|
|
18
|
+
id: "score-1",
|
|
19
|
+
runId: "run-1",
|
|
20
|
+
nodeId: "node-1",
|
|
21
|
+
iteration: 0,
|
|
22
|
+
attempt: 0,
|
|
23
|
+
scorerId: "accuracy",
|
|
24
|
+
scorerName: "Accuracy",
|
|
25
|
+
source: "batch",
|
|
26
|
+
score: 0.95,
|
|
27
|
+
reason: null,
|
|
28
|
+
metaJson: null,
|
|
29
|
+
inputJson: null,
|
|
30
|
+
outputJson: null,
|
|
31
|
+
groundTruthJson: null,
|
|
32
|
+
contextJson: null,
|
|
33
|
+
latencyMs: null,
|
|
34
|
+
scoredAtMs: 1_700_000_000_000,
|
|
35
|
+
durationMs: null,
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
selectRow.score satisfies number;
|
|
39
|
+
selectRow.runId satisfies string;
|
|
40
|
+
selectRow.reason satisfies string | null;
|
|
41
|
+
|
|
42
|
+
// @ts-expect-error score must remain typed as a number, not erased to any.
|
|
43
|
+
insertRow.score = "0.95";
|
|
44
|
+
|
|
45
|
+
// @ts-expect-error runId must remain typed as a string, not erased to any.
|
|
46
|
+
insertRow.runId = 123;
|
|
47
|
+
|
|
48
|
+
// @ts-expect-error unknown columns must not be accepted.
|
|
49
|
+
insertRow.unknownColumn = "nope";
|
package/src/aggregate.js
CHANGED
|
@@ -18,12 +18,13 @@
|
|
|
18
18
|
*/
|
|
19
19
|
export async function aggregateScores(adapter, opts) {
|
|
20
20
|
const conditions = [];
|
|
21
|
+
const params = [];
|
|
21
22
|
if (opts?.runId)
|
|
22
|
-
conditions
|
|
23
|
+
addFilter(conditions, params, "run_id", opts.runId);
|
|
23
24
|
if (opts?.nodeId)
|
|
24
|
-
conditions
|
|
25
|
+
addFilter(conditions, params, "node_id", opts.nodeId);
|
|
25
26
|
if (opts?.scorerId)
|
|
26
|
-
conditions
|
|
27
|
+
addFilter(conditions, params, "scorer_id", opts.scorerId);
|
|
27
28
|
const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
|
28
29
|
// Step 1: Get aggregate stats via SQL
|
|
29
30
|
const aggQuery = `
|
|
@@ -39,7 +40,7 @@ export async function aggregateScores(adapter, opts) {
|
|
|
39
40
|
GROUP BY scorer_id, scorer_name
|
|
40
41
|
ORDER BY scorer_name
|
|
41
42
|
`;
|
|
42
|
-
const aggRows = (await adapter.rawQuery(aggQuery));
|
|
43
|
+
const aggRows = (await adapter.rawQuery(aggQuery, params));
|
|
43
44
|
if (aggRows.length === 0)
|
|
44
45
|
return [];
|
|
45
46
|
// Step 2: Get all scores to compute p50 and stddev per scorer in memory
|
|
@@ -49,7 +50,7 @@ export async function aggregateScores(adapter, opts) {
|
|
|
49
50
|
${where}
|
|
50
51
|
ORDER BY scorer_id, score
|
|
51
52
|
`;
|
|
52
|
-
const allScores = (await adapter.rawQuery(scoresQuery));
|
|
53
|
+
const allScores = (await adapter.rawQuery(scoresQuery, params));
|
|
53
54
|
// Group scores by scorer_id
|
|
54
55
|
const scoresByScorer = new Map();
|
|
55
56
|
for (const row of allScores) {
|
|
@@ -100,9 +101,12 @@ function computeStddev(values, mean) {
|
|
|
100
101
|
return Math.sqrt(variance);
|
|
101
102
|
}
|
|
102
103
|
/**
|
|
104
|
+
* @param {string[]} conditions
|
|
105
|
+
* @param {string[]} params
|
|
106
|
+
* @param {string} column
|
|
103
107
|
* @param {string} value
|
|
104
|
-
* @returns {string}
|
|
105
108
|
*/
|
|
106
|
-
function
|
|
107
|
-
|
|
109
|
+
function addFilter(conditions, params, column, value) {
|
|
110
|
+
conditions.push(`${column} = ?`);
|
|
111
|
+
params.push(value);
|
|
108
112
|
}
|
package/src/index.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import * as _smithers_agents_AgentLike from '@smithers-orchestrator/agents/Agent
|
|
|
2
2
|
import { AgentLike as AgentLike$3 } from '@smithers-orchestrator/agents/AgentLike';
|
|
3
3
|
import { ZodObject } from 'zod';
|
|
4
4
|
import * as _smithers_db_adapter from '@smithers-orchestrator/db/adapter';
|
|
5
|
+
import * as drizzle_orm_sqlite_core from 'drizzle-orm/sqlite-core';
|
|
5
6
|
import * as effect_MetricState from 'effect/MetricState';
|
|
6
7
|
import * as effect_MetricKeyType from 'effect/MetricKeyType';
|
|
7
8
|
import { Metric } from 'effect';
|
|
@@ -74,6 +75,8 @@ type ScoreRow$1 = {
|
|
|
74
75
|
metaJson: string | null;
|
|
75
76
|
inputJson: string | null;
|
|
76
77
|
outputJson: string | null;
|
|
78
|
+
groundTruthJson: string | null;
|
|
79
|
+
contextJson: string | null;
|
|
77
80
|
latencyMs: number | null;
|
|
78
81
|
scoredAtMs: number;
|
|
79
82
|
durationMs: number | null;
|
|
@@ -97,6 +100,8 @@ type ScorerContext$2 = {
|
|
|
97
100
|
attempt: number;
|
|
98
101
|
input: unknown;
|
|
99
102
|
output: unknown;
|
|
103
|
+
groundTruth?: unknown;
|
|
104
|
+
context?: unknown;
|
|
100
105
|
latencyMs?: number;
|
|
101
106
|
outputSchema?: ZodObject;
|
|
102
107
|
};
|
|
@@ -156,7 +161,48 @@ type SmithersDb$1 = _smithers_db_adapter.SmithersDb;
|
|
|
156
161
|
* Drizzle table definition for the `_smithers_scorers` table.
|
|
157
162
|
* Stores individual scorer results for each task execution.
|
|
158
163
|
*/
|
|
159
|
-
|
|
164
|
+
type SmithersScorerColumn<Name extends string, Data, NotNull extends boolean, HasDefault extends boolean, PrimaryKey extends boolean, ColumnType extends string, DataType extends "string" | "number"> = drizzle_orm_sqlite_core.SQLiteColumn<{
|
|
165
|
+
name: Name;
|
|
166
|
+
tableName: "_smithers_scorers";
|
|
167
|
+
dataType: DataType;
|
|
168
|
+
columnType: ColumnType;
|
|
169
|
+
data: Data;
|
|
170
|
+
driverParam: Data;
|
|
171
|
+
notNull: NotNull;
|
|
172
|
+
hasDefault: HasDefault;
|
|
173
|
+
isPrimaryKey: PrimaryKey;
|
|
174
|
+
isAutoincrement: false;
|
|
175
|
+
hasRuntimeDefault: false;
|
|
176
|
+
enumValues: DataType extends "string" ? [string, ...string[]] : undefined;
|
|
177
|
+
baseColumn: never;
|
|
178
|
+
identity: undefined;
|
|
179
|
+
generated: undefined;
|
|
180
|
+
}, {}, {}>;
|
|
181
|
+
declare const smithersScorers: drizzle_orm_sqlite_core.SQLiteTableWithColumns<{
|
|
182
|
+
name: "_smithers_scorers";
|
|
183
|
+
schema: undefined;
|
|
184
|
+
columns: {
|
|
185
|
+
id: SmithersScorerColumn<"id", string, true, false, true, "SQLiteText", "string">;
|
|
186
|
+
runId: SmithersScorerColumn<"run_id", string, true, false, false, "SQLiteText", "string">;
|
|
187
|
+
nodeId: SmithersScorerColumn<"node_id", string, true, false, false, "SQLiteText", "string">;
|
|
188
|
+
iteration: SmithersScorerColumn<"iteration", number, true, true, false, "SQLiteInteger", "number">;
|
|
189
|
+
attempt: SmithersScorerColumn<"attempt", number, true, true, false, "SQLiteInteger", "number">;
|
|
190
|
+
scorerId: SmithersScorerColumn<"scorer_id", string, true, false, false, "SQLiteText", "string">;
|
|
191
|
+
scorerName: SmithersScorerColumn<"scorer_name", string, true, false, false, "SQLiteText", "string">;
|
|
192
|
+
source: SmithersScorerColumn<"source", string, true, false, false, "SQLiteText", "string">;
|
|
193
|
+
score: SmithersScorerColumn<"score", number, true, false, false, "SQLiteReal", "number">;
|
|
194
|
+
reason: SmithersScorerColumn<"reason", string, false, false, false, "SQLiteText", "string">;
|
|
195
|
+
metaJson: SmithersScorerColumn<"meta_json", string, false, false, false, "SQLiteText", "string">;
|
|
196
|
+
inputJson: SmithersScorerColumn<"input_json", string, false, false, false, "SQLiteText", "string">;
|
|
197
|
+
outputJson: SmithersScorerColumn<"output_json", string, false, false, false, "SQLiteText", "string">;
|
|
198
|
+
groundTruthJson: SmithersScorerColumn<"ground_truth_json", string, false, false, false, "SQLiteText", "string">;
|
|
199
|
+
contextJson: SmithersScorerColumn<"context_json", string, false, false, false, "SQLiteText", "string">;
|
|
200
|
+
latencyMs: SmithersScorerColumn<"latency_ms", number, false, false, false, "SQLiteReal", "number">;
|
|
201
|
+
scoredAtMs: SmithersScorerColumn<"scored_at_ms", number, true, false, false, "SQLiteInteger", "number">;
|
|
202
|
+
durationMs: SmithersScorerColumn<"duration_ms", number, false, false, false, "SQLiteReal", "number">;
|
|
203
|
+
};
|
|
204
|
+
dialect: "sqlite";
|
|
205
|
+
}>;
|
|
160
206
|
|
|
161
207
|
/** @typedef {import("./CreateScorerConfig.js").CreateScorerConfig} CreateScorerConfig */
|
|
162
208
|
/** @typedef {import("./types.js").Scorer} Scorer */
|
package/src/index.js
CHANGED
|
@@ -15,9 +15,14 @@
|
|
|
15
15
|
// @smithers-type-exports-end
|
|
16
16
|
|
|
17
17
|
// Factories
|
|
18
|
-
export { createScorer
|
|
18
|
+
export { createScorer } from "./createScorer.js";
|
|
19
|
+
export { llmJudge } from "./llmJudge.js";
|
|
19
20
|
// Built-in scorers
|
|
20
|
-
export { relevancyScorer
|
|
21
|
+
export { relevancyScorer } from "./relevancyScorer.js";
|
|
22
|
+
export { toxicityScorer } from "./toxicityScorer.js";
|
|
23
|
+
export { faithfulnessScorer } from "./faithfulnessScorer.js";
|
|
24
|
+
export { schemaAdherenceScorer } from "./schemaAdherenceScorer.js";
|
|
25
|
+
export { latencyScorer } from "./latencyScorer.js";
|
|
21
26
|
// Execution
|
|
22
27
|
export { runScorersAsync, runScorersBatch } from "./run-scorers.js";
|
|
23
28
|
// Aggregation
|
package/src/llmJudge.js
CHANGED
|
@@ -12,12 +12,19 @@
|
|
|
12
12
|
* in the `reason`) do not prematurely close the object.
|
|
13
13
|
*
|
|
14
14
|
* @param {string} text
|
|
15
|
-
* @returns {Record<string, unknown> | undefined}
|
|
15
|
+
* @returns {Record<string, unknown> | unknown[] | number | undefined}
|
|
16
16
|
*/
|
|
17
17
|
function parseJudgeJson(text) {
|
|
18
18
|
const trimmed = text.trim();
|
|
19
19
|
try {
|
|
20
|
-
|
|
20
|
+
const parsed = JSON.parse(trimmed);
|
|
21
|
+
if (typeof parsed === "number") {
|
|
22
|
+
return parsed;
|
|
23
|
+
}
|
|
24
|
+
if (parsed && typeof parsed === "object") {
|
|
25
|
+
return parsed;
|
|
26
|
+
}
|
|
27
|
+
return undefined;
|
|
21
28
|
}
|
|
22
29
|
catch {
|
|
23
30
|
// fall through to balanced-brace extraction
|
|
@@ -108,7 +115,16 @@ export function llmJudge(config) {
|
|
|
108
115
|
// text, then fall back to the outermost balanced-brace object so that a
|
|
109
116
|
// brace inside the judge's `reason` string does not truncate the match.
|
|
110
117
|
const parsed = parseJudgeJson(text);
|
|
111
|
-
if (
|
|
118
|
+
if (typeof parsed === "number") {
|
|
119
|
+
return {
|
|
120
|
+
score: Number.isFinite(parsed)
|
|
121
|
+
? Math.max(0, Math.min(1, parsed))
|
|
122
|
+
: 0,
|
|
123
|
+
reason: undefined,
|
|
124
|
+
meta: { raw: text },
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
if (parsed) {
|
|
112
128
|
const rawScore = Number(parsed.score);
|
|
113
129
|
return {
|
|
114
130
|
score: Number.isFinite(rawScore)
|
package/src/run-scorers.js
CHANGED
|
Binary file
|
package/src/types.ts
CHANGED
|
@@ -83,6 +83,8 @@ export type ScoreRow = {
|
|
|
83
83
|
metaJson: string | null;
|
|
84
84
|
inputJson: string | null;
|
|
85
85
|
outputJson: string | null;
|
|
86
|
+
groundTruthJson: string | null;
|
|
87
|
+
contextJson: string | null;
|
|
86
88
|
latencyMs: number | null;
|
|
87
89
|
scoredAtMs: number;
|
|
88
90
|
durationMs: number | null;
|
|
@@ -112,6 +114,8 @@ export type ScorerContext = {
|
|
|
112
114
|
attempt: number;
|
|
113
115
|
input: unknown;
|
|
114
116
|
output: unknown;
|
|
117
|
+
groundTruth?: unknown;
|
|
118
|
+
context?: unknown;
|
|
115
119
|
latencyMs?: number;
|
|
116
120
|
outputSchema?: ZodObject;
|
|
117
121
|
};
|
package/src/builtins.js
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
export { relevancyScorer } from "./relevancyScorer.js";
|
|
2
|
-
export { toxicityScorer } from "./toxicityScorer.js";
|
|
3
|
-
export { faithfulnessScorer } from "./faithfulnessScorer.js";
|
|
4
|
-
export { schemaAdherenceScorer } from "./schemaAdherenceScorer.js";
|
|
5
|
-
export { latencyScorer } from "./latencyScorer.js";
|
package/src/create-scorer.js
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
// @smithers-type-exports-begin
|
|
2
|
-
/** @typedef {import("./CreateScorerConfig.js").CreateScorerConfig} CreateScorerConfig */
|
|
3
|
-
/** @typedef {import("./LlmJudgeConfig.js").LlmJudgeConfig} LlmJudgeConfig */
|
|
4
|
-
// @smithers-type-exports-end
|
|
5
|
-
|
|
6
|
-
export { createScorer } from "./createScorer.js";
|
|
7
|
-
export { llmJudge } from "./llmJudge.js";
|
package/src/react-types.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export type { ScorersMap } from "@smithers-orchestrator/graph/types";
|