bingocode 1.1.161 → 1.1.163

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bingocode",
3
- "version": "1.1.161",
3
+ "version": "1.1.163",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "claude": "bin/claude-win.cjs",
@@ -139,6 +139,11 @@ type State = {
139
139
  goalCondition: string | null
140
140
  goalIterationCount: number
141
141
  goalMaxIterations: number
142
+ // Goal evaluator history for detecting repeated gaps
143
+ goalEvalHistory: {
144
+ lastGap: string | null
145
+ consecutiveSameGapCount: number
146
+ }
142
147
  // Session-only cron tasks created via CronCreate with durable: false.
143
148
  // Fire on schedule like file-backed tasks but are never written to
144
149
  // .claude/scheduled_tasks.json — they die with the process. Typed via
@@ -365,6 +370,10 @@ function getInitialState(): State {
365
370
  goalCondition: null,
366
371
  goalIterationCount: 0,
367
372
  goalMaxIterations: 20,
373
+ goalEvalHistory: {
374
+ lastGap: null,
375
+ consecutiveSameGapCount: 0,
376
+ },
368
377
  sessionCronTasks: [],
369
378
  sessionCreatedTeams: new Set(),
370
379
  // Session-only trust flag (not persisted to disk)
@@ -1805,3 +1814,17 @@ export function getGoalMaxIterations(): number {
1805
1814
  return STATE.goalMaxIterations
1806
1815
  }
1807
1816
 
1817
+ // Goal evaluator history accessors
1818
+ export function getGoalEvalHistory() {
1819
+ return STATE.goalEvalHistory
1820
+ }
1821
+
1822
+ export function updateGoalEvalHistory(lastGap: string | null): void {
1823
+ if (lastGap === STATE.goalEvalHistory.lastGap) {
1824
+ STATE.goalEvalHistory.consecutiveSameGapCount++
1825
+ } else {
1826
+ STATE.goalEvalHistory.lastGap = lastGap
1827
+ STATE.goalEvalHistory.consecutiveSameGapCount = 1
1828
+ }
1829
+ }
1830
+
@@ -7,6 +7,8 @@ import {
7
7
  getGoalMaxIterations,
8
8
  incrementGoalIterationCount,
9
9
  setGoalCondition,
10
+ getGoalEvalHistory,
11
+ updateGoalEvalHistory,
10
12
  } from '../bootstrap/state.js'
11
13
  import { enqueue } from '../utils/messageQueueManager.js'
12
14
  import { evaluateGoal } from '../utils/goalEvaluator.js'
@@ -71,6 +73,11 @@ export function useGoalEvaluator({
71
73
  if (getGoalCondition() !== condition) return
72
74
 
73
75
  incrementGoalIterationCount()
76
+ updateGoalEvalHistory(result.gap)
77
+
78
+ // Check for repeated gaps - implement circuit breaker
79
+ const evalHistory = getGoalEvalHistory()
80
+ const isRepeatedGap = evalHistory.consecutiveSameGapCount >= 3 && result.gap !== null
74
81
 
75
82
  if (result.satisfied) {
76
83
  setGoalCondition(null)
@@ -79,6 +86,14 @@ export function useGoalEvaluator({
79
86
  mode: 'task-notification',
80
87
  priority: 'now',
81
88
  })
89
+ } else if (isRepeatedGap) {
90
+ // Circuit breaker: stop after 3 repeated gaps
91
+ setGoalCondition(null)
92
+ enqueue({
93
+ value: `⚠️ Goal evaluator stopped after detecting the same gap "${result.gap}" 3 times in a row. Please adjust your approach or output EVAL blocks in the correct format.`,
94
+ mode: 'task-notification',
95
+ priority: 'now',
96
+ })
82
97
  } else {
83
98
  const continueMsg = result.gap
84
99
  ? `Goal not yet met (${iterCount + 1}/${maxIter}). Gap: ${result.gap}. Continue toward: "${condition}"`
@@ -63,11 +63,12 @@ This goal is now registered for this session. After each turn, an independent ev
63
63
 
64
64
  CRITICAL: The evaluator reads ONLY your text output. It cannot see code changes, tool results, or file contents — only the plain text you write.
65
65
 
66
- At each turn toward the goal, output a short evaluation block like:
67
- > EVAL: [metric1]: [value] / [target] ✓ or ✗
66
+ At each turn toward the goal, output a short evaluation block like:
67
+ EVAL: [metric1]: [value] / [target] ✓ or ✗
68
68
 
69
- This block is the ONLY signal the evaluator can reliably process. Make it short,
70
- unambiguous, and quantitative. Do NOT expect the evaluator to infer success from narrative discussion.
69
+ This block is the ONLY signal the evaluator can reliably process. Make it short,
70
+ unambiguous, and quantitative. Do NOT expect the evaluator to infer success from narrative discussion.
71
+ Note: The EVAL block can appear anywhere in your text response (not just in quote blocks).
71
72
 
72
73
  Tell the user: Goal set — you will work autonomously until "${trimmed}" is achieved (max ${maxIter} turns). Send \`/goal clear\` to cancel.
73
74
  Now begin: assess current state and take the first concrete action toward the goal.`,
@@ -34,11 +34,12 @@ function parseEvalBlocks(text: string): EvalBlock[] {
34
34
  // Arrow variants: → (U+2192), -> (ASCII), => (ASCII)
35
35
  // Pass variants: ✓ (U+2713), ✔ (U+2714), PASS (case-insensitive), Y, true, yes, 1
36
36
  // Fail variants: ✗ (U+2717), ✘ (U+2718), FAIL (case-insensitive), N, false, no, 0
37
+ // NOTE: Removed the requirement for ">" prefix to allow EVAL blocks anywhere in text
37
38
  const arrow = /(?:→|->|=>)/g.source
38
39
  const pass = /(?:✓|✔|PASS|pass|Y\b|true|yes|1)/g.source
39
40
  const fail = /(?:✗|✘|FAIL|fail|N\b|false|no|0)/g.source
40
41
  const full = new RegExp(
41
- `>\\s*EVAL:\\s*(.+?):\\s*(.+?)\\s*(?:${arrow}|)\\s*(${pass}|${fail})`,
42
+ `EVAL:\\s*(.+?):\\s*(.+?)\\s*(?:${arrow}|)\\s*(${pass}|${fail})`,
42
43
  'g',
43
44
  )
44
45
 
@@ -108,6 +109,15 @@ export async function evaluateGoal(
108
109
  }
109
110
  }
110
111
 
112
+ // If no EVAL blocks found at all, provide helpful guidance to the user
113
+ if (evalBlocks.length === 0) {
114
+ return {
115
+ satisfied: false,
116
+ reason: 'No EVAL blocks found in assistant output',
117
+ gap: 'Please output EVAL blocks in format: "EVAL: metric: value / target → ✓" (without > prefix)',
118
+ }
119
+ }
120
+
111
121
  // Phase 2: Fallback to Haiku evaluator with pre-parsed summary
112
122
  const evalInput = [
113
123
  evalSummary(evalBlocks),