@smithers-orchestrator/scorers 0.20.4 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +7 -7
  2. package/src/llmJudge.js +76 -17
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@smithers-orchestrator/scorers",
3
- "version": "0.20.4",
3
+ "version": "0.22.0",
4
4
  "description": "Smithers scorer definitions, execution, aggregation, and persistence helpers",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -33,12 +33,12 @@
33
33
  "drizzle-orm": "^0.45.2",
34
34
  "effect": "^3.21.1",
35
35
  "zod": "^4.3.6",
36
- "@smithers-orchestrator/db": "0.20.4",
37
- "@smithers-orchestrator/agents": "0.20.4",
38
- "@smithers-orchestrator/errors": "0.20.4",
39
- "@smithers-orchestrator/observability": "0.20.4",
40
- "@smithers-orchestrator/scheduler": "0.20.4",
41
- "@smithers-orchestrator/graph": "0.20.4"
36
+ "@smithers-orchestrator/agents": "0.22.0",
37
+ "@smithers-orchestrator/graph": "0.22.0",
38
+ "@smithers-orchestrator/db": "0.22.0",
39
+ "@smithers-orchestrator/errors": "0.22.0",
40
+ "@smithers-orchestrator/observability": "0.22.0",
41
+ "@smithers-orchestrator/scheduler": "0.22.0"
42
42
  },
43
43
  "devDependencies": {
44
44
  "@types/bun": "latest",
package/src/llmJudge.js CHANGED
@@ -3,6 +3,69 @@
3
3
  /** @typedef {import("./types.js").ScorerInput} ScorerInput */
4
4
  /** @typedef {import("./types.js").ScoreResult} ScoreResult */
5
5
 
6
+ /**
7
+ * Extracts and parses a JSON object from a judge's free-form text response.
8
+ *
9
+ * First tries `JSON.parse` on the trimmed text. If that fails, scans for the
10
+ * first `{` and walks forward tracking brace depth while respecting string
11
+ * literals and escape sequences, so braces inside string values (e.g. a brace
12
+ * in the `reason`) do not prematurely close the object.
13
+ *
14
+ * @param {string} text
15
+ * @returns {Record<string, unknown> | undefined}
16
+ */
17
+ function parseJudgeJson(text) {
18
+ const trimmed = text.trim();
19
+ try {
20
+ return JSON.parse(trimmed);
21
+ }
22
+ catch {
23
+ // fall through to balanced-brace extraction
24
+ }
25
+ const start = trimmed.indexOf("{");
26
+ if (start === -1) {
27
+ return undefined;
28
+ }
29
+ let depth = 0;
30
+ let inString = false;
31
+ let escaped = false;
32
+ for (let i = start; i < trimmed.length; i++) {
33
+ const char = trimmed[i];
34
+ if (escaped) {
35
+ escaped = false;
36
+ continue;
37
+ }
38
+ if (char === "\\") {
39
+ if (inString) {
40
+ escaped = true;
41
+ }
42
+ continue;
43
+ }
44
+ if (char === '"') {
45
+ inString = !inString;
46
+ continue;
47
+ }
48
+ if (inString) {
49
+ continue;
50
+ }
51
+ if (char === "{") {
52
+ depth++;
53
+ }
54
+ else if (char === "}") {
55
+ depth--;
56
+ if (depth === 0) {
57
+ try {
58
+ return JSON.parse(trimmed.slice(start, i + 1));
59
+ }
60
+ catch {
61
+ return undefined;
62
+ }
63
+ }
64
+ }
65
+ }
66
+ return undefined;
67
+ }
68
+
6
69
  /**
7
70
  * Creates an LLM-as-judge scorer that delegates evaluation to an AI agent.
8
71
  *
@@ -41,23 +104,19 @@ export function llmJudge(config) {
41
104
  : typeof response?.text === "string"
42
105
  ? response.text
43
106
  : JSON.stringify(response);
44
- // Try to parse JSON from the response
45
- const jsonMatch = text.match(/\{[\s\S]*?"score"\s*:\s*[\d.]+[\s\S]*?\}/);
46
- if (jsonMatch) {
47
- try {
48
- const parsed = JSON.parse(jsonMatch[0]);
49
- const rawScore = Number(parsed.score);
50
- return {
51
- score: Number.isFinite(rawScore)
52
- ? Math.max(0, Math.min(1, rawScore))
53
- : 0,
54
- reason: typeof parsed.reason === "string" ? parsed.reason : undefined,
55
- meta: { raw: text },
56
- };
57
- }
58
- catch {
59
- // fall through to default
60
- }
107
+ // Try to parse JSON from the response. First attempt the whole trimmed
108
+ // text, then fall back to the outermost balanced-brace object so that a
109
+ // brace inside the judge's `reason` string does not truncate the match.
110
+ const parsed = parseJudgeJson(text);
111
+ if (parsed && typeof parsed === "object") {
112
+ const rawScore = Number(parsed.score);
113
+ return {
114
+ score: Number.isFinite(rawScore)
115
+ ? Math.max(0, Math.min(1, rawScore))
116
+ : 0,
117
+ reason: typeof parsed.reason === "string" ? parsed.reason : undefined,
118
+ meta: { raw: text },
119
+ };
61
120
  }
62
121
  // If we can't parse JSON, return a low-confidence score
63
122
  return {