@smithers-orchestrator/scorers 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +7 -7
- package/src/llmJudge.js +76 -17
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@smithers-orchestrator/scorers",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.22.0",
|
|
4
4
|
"description": "Smithers scorer definitions, execution, aggregation, and persistence helpers",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -33,12 +33,12 @@
|
|
|
33
33
|
"drizzle-orm": "^0.45.2",
|
|
34
34
|
"effect": "^3.21.1",
|
|
35
35
|
"zod": "^4.3.6",
|
|
36
|
-
"@smithers-orchestrator/agents": "0.
|
|
37
|
-
"@smithers-orchestrator/graph": "0.
|
|
38
|
-
"@smithers-orchestrator/
|
|
39
|
-
"@smithers-orchestrator/
|
|
40
|
-
"@smithers-orchestrator/observability": "0.
|
|
41
|
-
"@smithers-orchestrator/scheduler": "0.
|
|
36
|
+
"@smithers-orchestrator/agents": "0.22.0",
|
|
37
|
+
"@smithers-orchestrator/graph": "0.22.0",
|
|
38
|
+
"@smithers-orchestrator/db": "0.22.0",
|
|
39
|
+
"@smithers-orchestrator/errors": "0.22.0",
|
|
40
|
+
"@smithers-orchestrator/observability": "0.22.0",
|
|
41
|
+
"@smithers-orchestrator/scheduler": "0.22.0"
|
|
42
42
|
},
|
|
43
43
|
"devDependencies": {
|
|
44
44
|
"@types/bun": "latest",
|
package/src/llmJudge.js
CHANGED
|
@@ -3,6 +3,69 @@
|
|
|
3
3
|
/** @typedef {import("./types.js").ScorerInput} ScorerInput */
|
|
4
4
|
/** @typedef {import("./types.js").ScoreResult} ScoreResult */
|
|
5
5
|
|
|
6
|
+
/**
|
|
7
|
+
* Extracts and parses a JSON object from a judge's free-form text response.
|
|
8
|
+
*
|
|
9
|
+
* First tries `JSON.parse` on the trimmed text. If that fails, scans for the
|
|
10
|
+
* first `{` and walks forward tracking brace depth while respecting string
|
|
11
|
+
* literals and escape sequences, so braces inside string values (e.g. a brace
|
|
12
|
+
* in the `reason`) do not prematurely close the object.
|
|
13
|
+
*
|
|
14
|
+
* @param {string} text
|
|
15
|
+
* @returns {Record<string, unknown> | undefined}
|
|
16
|
+
*/
|
|
17
|
+
function parseJudgeJson(text) {
|
|
18
|
+
const trimmed = text.trim();
|
|
19
|
+
try {
|
|
20
|
+
return JSON.parse(trimmed);
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
// fall through to balanced-brace extraction
|
|
24
|
+
}
|
|
25
|
+
const start = trimmed.indexOf("{");
|
|
26
|
+
if (start === -1) {
|
|
27
|
+
return undefined;
|
|
28
|
+
}
|
|
29
|
+
let depth = 0;
|
|
30
|
+
let inString = false;
|
|
31
|
+
let escaped = false;
|
|
32
|
+
for (let i = start; i < trimmed.length; i++) {
|
|
33
|
+
const char = trimmed[i];
|
|
34
|
+
if (escaped) {
|
|
35
|
+
escaped = false;
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
if (char === "\\") {
|
|
39
|
+
if (inString) {
|
|
40
|
+
escaped = true;
|
|
41
|
+
}
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
if (char === '"') {
|
|
45
|
+
inString = !inString;
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
if (inString) {
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
if (char === "{") {
|
|
52
|
+
depth++;
|
|
53
|
+
}
|
|
54
|
+
else if (char === "}") {
|
|
55
|
+
depth--;
|
|
56
|
+
if (depth === 0) {
|
|
57
|
+
try {
|
|
58
|
+
return JSON.parse(trimmed.slice(start, i + 1));
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
return undefined;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return undefined;
|
|
67
|
+
}
|
|
68
|
+
|
|
6
69
|
/**
|
|
7
70
|
* Creates an LLM-as-judge scorer that delegates evaluation to an AI agent.
|
|
8
71
|
*
|
|
@@ -41,23 +104,19 @@ export function llmJudge(config) {
|
|
|
41
104
|
: typeof response?.text === "string"
|
|
42
105
|
? response.text
|
|
43
106
|
: JSON.stringify(response);
|
|
44
|
-
// Try to parse JSON from the response
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
}
|
|
58
|
-
catch {
|
|
59
|
-
// fall through to default
|
|
60
|
-
}
|
|
107
|
+
// Try to parse JSON from the response. First attempt the whole trimmed
|
|
108
|
+
// text, then fall back to the outermost balanced-brace object so that a
|
|
109
|
+
// brace inside the judge's `reason` string does not truncate the match.
|
|
110
|
+
const parsed = parseJudgeJson(text);
|
|
111
|
+
if (parsed && typeof parsed === "object") {
|
|
112
|
+
const rawScore = Number(parsed.score);
|
|
113
|
+
return {
|
|
114
|
+
score: Number.isFinite(rawScore)
|
|
115
|
+
? Math.max(0, Math.min(1, rawScore))
|
|
116
|
+
: 0,
|
|
117
|
+
reason: typeof parsed.reason === "string" ? parsed.reason : undefined,
|
|
118
|
+
meta: { raw: text },
|
|
119
|
+
};
|
|
61
120
|
}
|
|
62
121
|
// If we can't parse JSON, return a low-confidence score
|
|
63
122
|
return {
|