bingocode 1.1.156 → 1.1.157
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/utils/goalEvaluator.ts +85 -29
package/package.json
CHANGED
|
@@ -18,18 +18,34 @@ type EvalBlock = {
|
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
/**
|
|
21
|
-
* Parse markdown text for structured
|
|
21
|
+
* Parse markdown text for structured > EVAL: lines.
|
|
22
22
|
*
|
|
23
|
-
*
|
|
23
|
+
* Accepted actor formats:
|
|
24
24
|
* > EVAL: <metric>: <value> / <target> → ✓ or ✗
|
|
25
|
+
* > EVAL: <metric>: <value> / <target> -> PASS
|
|
26
|
+
* > EVAL: <metric>: <value> / <target> => true
|
|
27
|
+
*
|
|
28
|
+
* Supports ASCII and Unicode arrow/check/cross variants for maximum compatibility.
|
|
25
29
|
*/
|
|
26
30
|
function parseEvalBlocks(text: string): EvalBlock[] {
|
|
27
31
|
const blocks: EvalBlock[] = []
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
32
|
+
|
|
33
|
+
// Build one combined pattern: capture metric + valuetarget + pass/fail signal.
|
|
34
|
+
// Arrow variants: → (U+2192), -> (ASCII), => (ASCII)
|
|
35
|
+
// Pass variants: ✓ (U+2713), ✔ (U+2714), PASS (case-insensitive), Y, true, yes, 1
|
|
36
|
+
// Fail variants: ✗ (U+2717), ✘ (U+2718), FAIL (case-insensitive), N, false, no, 0
|
|
37
|
+
const arrow = /(?:→|->|=>)/g.source
|
|
38
|
+
const pass = /(?:✓|✔|PASS|pass|Y\b|true|yes|1)/g.source
|
|
39
|
+
const fail = /(?:✗|✘|FAIL|fail|N\b|false|no|0)/g.source
|
|
40
|
+
const full = new RegExp(
|
|
41
|
+
`>\\s*EVAL:\\s*(.+?):\\s*(.+?)\\s*(?:${arrow}|)\\s*(${pass}|${fail})`,
|
|
42
|
+
'g',
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
let match: RegExpExecArray | null
|
|
46
|
+
while ((match = full.exec(text)) !== null) {
|
|
47
|
+
const [, metric, valueTarget, signal] = match
|
|
48
|
+
const passed = /^(✓|✔|PASS|pass|Y\b|true|yes|1)$/.test(signal.trim())
|
|
33
49
|
blocks.push({ metric: metric.trim(), valueTarget: valueTarget.trim(), passed })
|
|
34
50
|
}
|
|
35
51
|
return blocks
|
|
@@ -105,12 +121,14 @@ export async function evaluateGoal(
|
|
|
105
121
|
apiKey: process.env.ANTHROPIC_API_KEY ?? 'dummy',
|
|
106
122
|
})
|
|
107
123
|
|
|
108
|
-
const prompt = `
|
|
124
|
+
const prompt = `Goal condition to evaluate: "${goalCondition}"
|
|
109
125
|
|
|
110
|
-
|
|
126
|
+
The assistant's recent output is below. Based ONLY on it, determine if the goal is satisfied.
|
|
111
127
|
|
|
112
|
-
|
|
113
|
-
{"satisfied": true|false, "reason": "<one sentence>", "gap": "<
|
|
128
|
+
RESPOND WITH ONLY VALID JSON — no markdown, no explanation:
|
|
129
|
+
{"satisfied": true|false, "reason": "<one sentence why>", "gap": "<what's still missing, or null if satisfied>"}
|
|
130
|
+
|
|
131
|
+
${evalInput.slice(0, 5000)}`
|
|
114
132
|
|
|
115
133
|
let text = ''
|
|
116
134
|
try {
|
|
@@ -129,24 +147,62 @@ Evaluate and respond ONLY in valid JSON:
|
|
|
129
147
|
}
|
|
130
148
|
}
|
|
131
149
|
|
|
132
|
-
// Phase 3: Parse evaluator output back to JSON
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
+
// Phase 3: Parse evaluator output back to JSON.
|
|
151
|
+
// Try strict JSON first, then fuzzy extraction, then interpret heuristics.
|
|
152
|
+
const parseError = (detail: string): GoalEvalResult => ({
|
|
153
|
+
satisfied: false,
|
|
154
|
+
reason: 'Evaluator parse error',
|
|
155
|
+
gap: `Failed to parse evaluator output. Detail: ${detail}. First 120 chars of raw response: ${text.slice(0, 120)}`,
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
const tryJsonParse = (raw: string): { ok: true; value: GoalEvalResult } | { ok: false } => {
|
|
159
|
+
try {
|
|
160
|
+
let cleaned = raw
|
|
161
|
+
.replace(/```(?:json)?\s*/gi, '')
|
|
162
|
+
.replace(/```/g, '')
|
|
163
|
+
.trim()
|
|
164
|
+
const start = cleaned.indexOf('{')
|
|
165
|
+
const end = cleaned.lastIndexOf('}')
|
|
166
|
+
if (start === -1 || end === -1 || end <= start) return { ok: false }
|
|
167
|
+
cleaned = cleaned.slice(start, end + 1)
|
|
168
|
+
const parsed = JSON.parse(cleaned)
|
|
169
|
+
if (typeof parsed.satisfied === 'boolean') {
|
|
170
|
+
return {
|
|
171
|
+
ok: true,
|
|
172
|
+
value: {
|
|
173
|
+
satisfied: parsed.satisfied,
|
|
174
|
+
reason: parsed.reason || '',
|
|
175
|
+
gap: parsed.gap || null,
|
|
176
|
+
},
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
return { ok: false }
|
|
180
|
+
} catch {
|
|
181
|
+
return { ok: false }
|
|
150
182
|
}
|
|
151
183
|
}
|
|
184
|
+
|
|
185
|
+
// Attempt 1 — strict JSON parse of the raw text
|
|
186
|
+
const result = tryJsonParse(text)
|
|
187
|
+
if (result.ok) return result.value
|
|
188
|
+
|
|
189
|
+
// Attempt 2 — heuristic extraction from text response
|
|
190
|
+
const lower = text.toLowerCase()
|
|
191
|
+
const looksSatisfied =
|
|
192
|
+
lower.includes('"satisfied": true') ||
|
|
193
|
+
(lower.includes('satisfied') && lower.includes('true')) ||
|
|
194
|
+
/goal\s+is\s+(?:met|satisfied|achieved)/.test(lower) ||
|
|
195
|
+
/condition\s+is\s+(?:met|satisfied|fulfilled)/.test(lower)
|
|
196
|
+
|
|
197
|
+
const extractString = (field: string): string => {
|
|
198
|
+
const regex = new RegExp(`"${field}"\\s*:\\s*"([^"]*)"`, 'i')
|
|
199
|
+
const match = text.match(regex)
|
|
200
|
+
return match ? match[1] : 'unknown'
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
return {
|
|
204
|
+
satisfied: looksSatisfied,
|
|
205
|
+
reason: extractString('reason') || (looksSatisfied ? 'condition matched' : 'condition not met'),
|
|
206
|
+
gap: extractString('gap') || null,
|
|
207
|
+
}
|
|
152
208
|
}
|