@smithers-orchestrator/scorers 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@smithers-orchestrator/scorers",
3
- "version": "0.24.0",
3
+ "version": "0.25.0",
4
4
  "description": "Smithers scorer definitions, execution, aggregation, and persistence helpers",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -33,12 +33,12 @@
33
33
  "drizzle-orm": "^0.45.2",
34
34
  "effect": "^3.21.1",
35
35
  "zod": "^4.3.6",
36
- "@smithers-orchestrator/agents": "0.24.0",
37
- "@smithers-orchestrator/errors": "0.24.0",
38
- "@smithers-orchestrator/observability": "0.24.0",
39
- "@smithers-orchestrator/db": "0.24.0",
40
- "@smithers-orchestrator/scheduler": "0.24.0",
41
- "@smithers-orchestrator/graph": "0.24.0"
36
+ "@smithers-orchestrator/agents": "0.25.0",
37
+ "@smithers-orchestrator/db": "0.25.0",
38
+ "@smithers-orchestrator/errors": "0.25.0",
39
+ "@smithers-orchestrator/observability": "0.25.0",
40
+ "@smithers-orchestrator/graph": "0.25.0",
41
+ "@smithers-orchestrator/scheduler": "0.25.0"
42
42
  },
43
43
  "devDependencies": {
44
44
  "@types/bun": "latest",
@@ -0,0 +1,49 @@
1
+ import { smithersScorers } from "../index.js";
2
+
3
+ type InsertScorer = typeof smithersScorers.$inferInsert;
4
+ type SelectScorer = typeof smithersScorers.$inferSelect;
5
+
6
+ const insertRow: InsertScorer = {
7
+ id: "score-1",
8
+ runId: "run-1",
9
+ nodeId: "node-1",
10
+ scorerId: "accuracy",
11
+ scorerName: "Accuracy",
12
+ source: "batch",
13
+ score: 0.95,
14
+ scoredAtMs: 1_700_000_000_000,
15
+ };
16
+
17
+ const selectRow: SelectScorer = {
18
+ id: "score-1",
19
+ runId: "run-1",
20
+ nodeId: "node-1",
21
+ iteration: 0,
22
+ attempt: 0,
23
+ scorerId: "accuracy",
24
+ scorerName: "Accuracy",
25
+ source: "batch",
26
+ score: 0.95,
27
+ reason: null,
28
+ metaJson: null,
29
+ inputJson: null,
30
+ outputJson: null,
31
+ groundTruthJson: null,
32
+ contextJson: null,
33
+ latencyMs: null,
34
+ scoredAtMs: 1_700_000_000_000,
35
+ durationMs: null,
36
+ };
37
+
38
+ selectRow.score satisfies number;
39
+ selectRow.runId satisfies string;
40
+ selectRow.reason satisfies string | null;
41
+
42
+ // @ts-expect-error score must remain typed as a number, not erased to any.
43
+ insertRow.score = "0.95";
44
+
45
+ // @ts-expect-error runId must remain typed as a string, not erased to any.
46
+ insertRow.runId = 123;
47
+
48
+ // @ts-expect-error unknown columns must not be accepted.
49
+ insertRow.unknownColumn = "nope";
package/src/aggregate.js CHANGED
@@ -18,12 +18,13 @@
18
18
  */
19
19
  export async function aggregateScores(adapter, opts) {
20
20
  const conditions = [];
21
+ const params = [];
21
22
  if (opts?.runId)
22
- conditions.push(`run_id = '${escapeSql(opts.runId)}'`);
23
+ addFilter(conditions, params, "run_id", opts.runId);
23
24
  if (opts?.nodeId)
24
- conditions.push(`node_id = '${escapeSql(opts.nodeId)}'`);
25
+ addFilter(conditions, params, "node_id", opts.nodeId);
25
26
  if (opts?.scorerId)
26
- conditions.push(`scorer_id = '${escapeSql(opts.scorerId)}'`);
27
+ addFilter(conditions, params, "scorer_id", opts.scorerId);
27
28
  const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
28
29
  // Step 1: Get aggregate stats via SQL
29
30
  const aggQuery = `
@@ -39,7 +40,7 @@ export async function aggregateScores(adapter, opts) {
39
40
  GROUP BY scorer_id, scorer_name
40
41
  ORDER BY scorer_name
41
42
  `;
42
- const aggRows = (await adapter.rawQuery(aggQuery));
43
+ const aggRows = (await adapter.rawQuery(aggQuery, params));
43
44
  if (aggRows.length === 0)
44
45
  return [];
45
46
  // Step 2: Get all scores to compute p50 and stddev per scorer in memory
@@ -49,7 +50,7 @@ export async function aggregateScores(adapter, opts) {
49
50
  ${where}
50
51
  ORDER BY scorer_id, score
51
52
  `;
52
- const allScores = (await adapter.rawQuery(scoresQuery));
53
+ const allScores = (await adapter.rawQuery(scoresQuery, params));
53
54
  // Group scores by scorer_id
54
55
  const scoresByScorer = new Map();
55
56
  for (const row of allScores) {
@@ -100,9 +101,12 @@ function computeStddev(values, mean) {
100
101
  return Math.sqrt(variance);
101
102
  }
102
103
  /**
104
+ * @param {string[]} conditions
105
+ * @param {string[]} params
106
+ * @param {string} column
103
107
  * @param {string} value
104
- * @returns {string}
105
108
  */
106
- function escapeSql(value) {
107
- return value.replace(/'/g, "''");
109
+ function addFilter(conditions, params, column, value) {
110
+ conditions.push(`${column} = ?`);
111
+ params.push(value);
108
112
  }
package/src/index.d.ts CHANGED
@@ -2,6 +2,7 @@ import * as _smithers_agents_AgentLike from '@smithers-orchestrator/agents/Agent
2
2
  import { AgentLike as AgentLike$3 } from '@smithers-orchestrator/agents/AgentLike';
3
3
  import { ZodObject } from 'zod';
4
4
  import * as _smithers_db_adapter from '@smithers-orchestrator/db/adapter';
5
+ import * as drizzle_orm_sqlite_core from 'drizzle-orm/sqlite-core';
5
6
  import * as effect_MetricState from 'effect/MetricState';
6
7
  import * as effect_MetricKeyType from 'effect/MetricKeyType';
7
8
  import { Metric } from 'effect';
@@ -74,6 +75,8 @@ type ScoreRow$1 = {
74
75
  metaJson: string | null;
75
76
  inputJson: string | null;
76
77
  outputJson: string | null;
78
+ groundTruthJson: string | null;
79
+ contextJson: string | null;
77
80
  latencyMs: number | null;
78
81
  scoredAtMs: number;
79
82
  durationMs: number | null;
@@ -97,6 +100,8 @@ type ScorerContext$2 = {
97
100
  attempt: number;
98
101
  input: unknown;
99
102
  output: unknown;
103
+ groundTruth?: unknown;
104
+ context?: unknown;
100
105
  latencyMs?: number;
101
106
  outputSchema?: ZodObject;
102
107
  };
@@ -156,7 +161,48 @@ type SmithersDb$1 = _smithers_db_adapter.SmithersDb;
156
161
  * Drizzle table definition for the `_smithers_scorers` table.
157
162
  * Stores individual scorer results for each task execution.
158
163
  */
159
- declare const smithersScorers: any;
164
+ type SmithersScorerColumn<Name extends string, Data, NotNull extends boolean, HasDefault extends boolean, PrimaryKey extends boolean, ColumnType extends string, DataType extends "string" | "number"> = drizzle_orm_sqlite_core.SQLiteColumn<{
165
+ name: Name;
166
+ tableName: "_smithers_scorers";
167
+ dataType: DataType;
168
+ columnType: ColumnType;
169
+ data: Data;
170
+ driverParam: Data;
171
+ notNull: NotNull;
172
+ hasDefault: HasDefault;
173
+ isPrimaryKey: PrimaryKey;
174
+ isAutoincrement: false;
175
+ hasRuntimeDefault: false;
176
+ enumValues: DataType extends "string" ? [string, ...string[]] : undefined;
177
+ baseColumn: never;
178
+ identity: undefined;
179
+ generated: undefined;
180
+ }, {}, {}>;
181
+ declare const smithersScorers: drizzle_orm_sqlite_core.SQLiteTableWithColumns<{
182
+ name: "_smithers_scorers";
183
+ schema: undefined;
184
+ columns: {
185
+ id: SmithersScorerColumn<"id", string, true, false, true, "SQLiteText", "string">;
186
+ runId: SmithersScorerColumn<"run_id", string, true, false, false, "SQLiteText", "string">;
187
+ nodeId: SmithersScorerColumn<"node_id", string, true, false, false, "SQLiteText", "string">;
188
+ iteration: SmithersScorerColumn<"iteration", number, true, true, false, "SQLiteInteger", "number">;
189
+ attempt: SmithersScorerColumn<"attempt", number, true, true, false, "SQLiteInteger", "number">;
190
+ scorerId: SmithersScorerColumn<"scorer_id", string, true, false, false, "SQLiteText", "string">;
191
+ scorerName: SmithersScorerColumn<"scorer_name", string, true, false, false, "SQLiteText", "string">;
192
+ source: SmithersScorerColumn<"source", string, true, false, false, "SQLiteText", "string">;
193
+ score: SmithersScorerColumn<"score", number, true, false, false, "SQLiteReal", "number">;
194
+ reason: SmithersScorerColumn<"reason", string, false, false, false, "SQLiteText", "string">;
195
+ metaJson: SmithersScorerColumn<"meta_json", string, false, false, false, "SQLiteText", "string">;
196
+ inputJson: SmithersScorerColumn<"input_json", string, false, false, false, "SQLiteText", "string">;
197
+ outputJson: SmithersScorerColumn<"output_json", string, false, false, false, "SQLiteText", "string">;
198
+ groundTruthJson: SmithersScorerColumn<"ground_truth_json", string, false, false, false, "SQLiteText", "string">;
199
+ contextJson: SmithersScorerColumn<"context_json", string, false, false, false, "SQLiteText", "string">;
200
+ latencyMs: SmithersScorerColumn<"latency_ms", number, false, false, false, "SQLiteReal", "number">;
201
+ scoredAtMs: SmithersScorerColumn<"scored_at_ms", number, true, false, false, "SQLiteInteger", "number">;
202
+ durationMs: SmithersScorerColumn<"duration_ms", number, false, false, false, "SQLiteReal", "number">;
203
+ };
204
+ dialect: "sqlite";
205
+ }>;
160
206
 
161
207
  /** @typedef {import("./CreateScorerConfig.js").CreateScorerConfig} CreateScorerConfig */
162
208
  /** @typedef {import("./types.js").Scorer} Scorer */
package/src/index.js CHANGED
@@ -15,9 +15,14 @@
15
15
  // @smithers-type-exports-end
16
16
 
17
17
  // Factories
18
- export { createScorer, llmJudge } from "./create-scorer.js";
18
+ export { createScorer } from "./createScorer.js";
19
+ export { llmJudge } from "./llmJudge.js";
19
20
  // Built-in scorers
20
- export { relevancyScorer, toxicityScorer, faithfulnessScorer, schemaAdherenceScorer, latencyScorer, } from "./builtins.js";
21
+ export { relevancyScorer } from "./relevancyScorer.js";
22
+ export { toxicityScorer } from "./toxicityScorer.js";
23
+ export { faithfulnessScorer } from "./faithfulnessScorer.js";
24
+ export { schemaAdherenceScorer } from "./schemaAdherenceScorer.js";
25
+ export { latencyScorer } from "./latencyScorer.js";
21
26
  // Execution
22
27
  export { runScorersAsync, runScorersBatch } from "./run-scorers.js";
23
28
  // Aggregation
package/src/llmJudge.js CHANGED
@@ -12,12 +12,19 @@
12
12
  * in the `reason`) do not prematurely close the object.
13
13
  *
14
14
  * @param {string} text
15
- * @returns {Record<string, unknown> | undefined}
15
+ * @returns {Record<string, unknown> | unknown[] | number | undefined}
16
16
  */
17
17
  function parseJudgeJson(text) {
18
18
  const trimmed = text.trim();
19
19
  try {
20
- return JSON.parse(trimmed);
20
+ const parsed = JSON.parse(trimmed);
21
+ if (typeof parsed === "number") {
22
+ return parsed;
23
+ }
24
+ if (parsed && typeof parsed === "object") {
25
+ return parsed;
26
+ }
27
+ return undefined;
21
28
  }
22
29
  catch {
23
30
  // fall through to balanced-brace extraction
@@ -108,7 +115,16 @@ export function llmJudge(config) {
108
115
  // text, then fall back to the outermost balanced-brace object so that a
109
116
  // brace inside the judge's `reason` string does not truncate the match.
110
117
  const parsed = parseJudgeJson(text);
111
- if (parsed && typeof parsed === "object") {
118
+ if (typeof parsed === "number") {
119
+ return {
120
+ score: Number.isFinite(parsed)
121
+ ? Math.max(0, Math.min(1, parsed))
122
+ : 0,
123
+ reason: undefined,
124
+ meta: { raw: text },
125
+ };
126
+ }
127
+ if (parsed) {
112
128
  const rawScore = Number(parsed.score);
113
129
  return {
114
130
  score: Number.isFinite(rawScore)
Binary file
package/src/types.ts CHANGED
@@ -83,6 +83,8 @@ export type ScoreRow = {
83
83
  metaJson: string | null;
84
84
  inputJson: string | null;
85
85
  outputJson: string | null;
86
+ groundTruthJson: string | null;
87
+ contextJson: string | null;
86
88
  latencyMs: number | null;
87
89
  scoredAtMs: number;
88
90
  durationMs: number | null;
@@ -112,6 +114,8 @@ export type ScorerContext = {
112
114
  attempt: number;
113
115
  input: unknown;
114
116
  output: unknown;
117
+ groundTruth?: unknown;
118
+ context?: unknown;
115
119
  latencyMs?: number;
116
120
  outputSchema?: ZodObject;
117
121
  };
package/src/builtins.js DELETED
@@ -1,5 +0,0 @@
1
- export { relevancyScorer } from "./relevancyScorer.js";
2
- export { toxicityScorer } from "./toxicityScorer.js";
3
- export { faithfulnessScorer } from "./faithfulnessScorer.js";
4
- export { schemaAdherenceScorer } from "./schemaAdherenceScorer.js";
5
- export { latencyScorer } from "./latencyScorer.js";
@@ -1,7 +0,0 @@
1
- // @smithers-type-exports-begin
2
- /** @typedef {import("./CreateScorerConfig.js").CreateScorerConfig} CreateScorerConfig */
3
- /** @typedef {import("./LlmJudgeConfig.js").LlmJudgeConfig} LlmJudgeConfig */
4
- // @smithers-type-exports-end
5
-
6
- export { createScorer } from "./createScorer.js";
7
- export { llmJudge } from "./llmJudge.js";
@@ -1 +0,0 @@
1
- export type { ScorersMap } from "@smithers-orchestrator/graph/types";