@miller-tech/uap 1.20.11 → 1.20.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.11",
3
+ "version": "1.20.13",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -143,7 +143,7 @@ PROXY_TOOL_STATE_MIN_MESSAGES = int(
143
143
  os.environ.get("PROXY_TOOL_STATE_MIN_MESSAGES", "6")
144
144
  )
145
145
  PROXY_TOOL_STATE_FORCED_BUDGET = int(
146
- os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "24")
146
+ os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "12")
147
147
  )
148
148
  PROXY_TOOL_STATE_AUTO_BUDGET = int(os.environ.get("PROXY_TOOL_STATE_AUTO_BUDGET", "2"))
149
149
  PROXY_TOOL_STATE_STAGNATION_THRESHOLD = int(
@@ -156,7 +156,7 @@ PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
156
156
  os.environ.get("PROXY_TOOL_STATE_FINALIZE_THRESHOLD", "18")
157
157
  )
158
158
  PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
159
- os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "2")
159
+ os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "1")
160
160
  )
161
161
  PROXY_CLIENT_RATE_WINDOW_SECS = int(
162
162
  os.environ.get("PROXY_CLIENT_RATE_WINDOW_SECS", "60")
@@ -628,6 +628,7 @@ class SessionMonitor:
628
628
  tool_state_transitions: int = 0
629
629
  tool_state_review_cycles: int = 0
630
630
  last_tool_fingerprint: str = ""
631
+ cycling_tool_names: list = field(default_factory=list)
631
632
  finalize_turn_active: bool = False
632
633
  completion_required: bool = False
633
634
  completion_pending: bool = False
@@ -832,6 +833,7 @@ class SessionMonitor:
832
833
  self.tool_state_auto_budget_remaining = 0
833
834
  self.tool_state_stagnation_streak = 0
834
835
  self.tool_state_review_cycles = 0
836
+ self.cycling_tool_names = []
835
837
  self.last_tool_fingerprint = ""
836
838
 
837
839
  def update_completion_state(self, anthropic_body: dict, has_tool_results: bool):
@@ -1104,7 +1106,10 @@ def estimate_total_tokens(anthropic_body: dict) -> int:
1104
1106
 
1105
1107
 
1106
1108
  def prune_conversation(
1107
- anthropic_body: dict, context_window: int, target_fraction: float = 0.65
1109
+ anthropic_body: dict,
1110
+ context_window: int,
1111
+ target_fraction: float = 0.65,
1112
+ keep_last: int = 8,
1108
1113
  ) -> dict:
1109
1114
  """Prune the conversation to fit within the context window.
1110
1115
 
@@ -1119,6 +1124,7 @@ def prune_conversation(
1119
1124
  anthropic_body: The full Anthropic request body
1120
1125
  context_window: Maximum context window in tokens
1121
1126
  target_fraction: Target utilization after pruning (0.0-1.0)
1127
+ keep_last: Number of recent messages to always keep (default 8)
1122
1128
 
1123
1129
  Returns:
1124
1130
  Modified anthropic_body with pruned messages
@@ -1131,6 +1137,8 @@ def prune_conversation(
1131
1137
  target_tokens = int(context_window * target_fraction)
1132
1138
 
1133
1139
  # Estimate non-message tokens (system, tools, agentic supplement)
1140
+ # Apply a 1.5x safety factor to account for chat template overhead
1141
+ # and tokenization differences between local estimate and upstream
1134
1142
  overhead_tokens = 0
1135
1143
  system = anthropic_body.get("system", "")
1136
1144
  if isinstance(system, str):
@@ -1144,6 +1152,7 @@ def prune_conversation(
1144
1152
  tools = anthropic_body.get("tools", [])
1145
1153
  if tools:
1146
1154
  overhead_tokens += estimate_tokens(json.dumps(tools))
1155
+ overhead_tokens = int(overhead_tokens * 1.5) # Safety factor for template overhead
1147
1156
 
1148
1157
  # Budget for messages
1149
1158
  message_budget = target_tokens - overhead_tokens
@@ -1152,7 +1161,7 @@ def prune_conversation(
1152
1161
  return anthropic_body
1153
1162
 
1154
1163
  # Always keep the first user message and the last N messages
1155
- KEEP_LAST = 8 # Keep the last 8 messages (recent context)
1164
+ KEEP_LAST = keep_last
1156
1165
  protected_head = messages[:1] # First user message
1157
1166
  protected_tail = (
1158
1167
  messages[-KEEP_LAST:] if len(messages) > KEEP_LAST else messages[1:]
@@ -2046,12 +2055,17 @@ def _resolve_state_machine_tool_choice(
2046
2055
  monitor.tool_state_forced_budget_remaining = max(
2047
2056
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2048
2057
  )
2058
+ # Capture which tools are cycling for narrowing/hint injection
2059
+ window = max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
2060
+ recent = [fp for fp in monitor.tool_call_history[-window:] if fp]
2061
+ monitor.cycling_tool_names = list(dict.fromkeys(recent))
2049
2062
  logger.warning(
2050
- "TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d)",
2063
+ "TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d cycling_tools=%s)",
2051
2064
  cycle_looping,
2052
2065
  cycle_repeat,
2053
2066
  monitor.tool_state_stagnation_streak,
2054
2067
  monitor.tool_state_review_cycles,
2068
+ monitor.cycling_tool_names,
2055
2069
  )
2056
2070
  return "required", reason
2057
2071
 
@@ -2342,6 +2356,49 @@ def build_openai_request(
2342
2356
  monitor.no_progress_streak = (
2343
2357
  0 if last_user_has_tool_result else monitor.no_progress_streak + 1
2344
2358
  )
2359
+ # Option 1: Inject cycle-break instruction when entering review
2360
+ if (
2361
+ monitor.tool_turn_phase == "review"
2362
+ and state_reason in {"cycle_detected", "stagnation"}
2363
+ and monitor.cycling_tool_names
2364
+ ):
2365
+ cycling_names = ", ".join(monitor.cycling_tool_names)
2366
+ cycle_hint = (
2367
+ f"You have been repeatedly calling the same tool(s): {cycling_names}. "
2368
+ "This is not making progress. Use a DIFFERENT tool to advance the task, "
2369
+ "or call a tool that produces your final answer."
2370
+ )
2371
+ messages = openai_body.get("messages", [])
2372
+ messages.append({"role": "user", "content": cycle_hint})
2373
+ openai_body["messages"] = messages
2374
+ logger.warning(
2375
+ "CYCLE BREAK: injected hint about cycling tools: %s",
2376
+ cycling_names,
2377
+ )
2378
+ # Option 2: Narrow tools during review to exclude cycling tools
2379
+ if (
2380
+ monitor.tool_turn_phase == "review"
2381
+ and monitor.cycling_tool_names
2382
+ and "tools" in openai_body
2383
+ ):
2384
+ original_count = len(openai_body["tools"])
2385
+ narrowed = [
2386
+ t
2387
+ for t in openai_body["tools"]
2388
+ if t.get("function", {}).get("name") not in monitor.cycling_tool_names
2389
+ ]
2390
+ if narrowed:
2391
+ openai_body["tools"] = narrowed
2392
+ logger.warning(
2393
+ "CYCLE BREAK: narrowed tools from %d to %d (excluded %s)",
2394
+ original_count,
2395
+ len(narrowed),
2396
+ monitor.cycling_tool_names,
2397
+ )
2398
+ else:
2399
+ logger.warning(
2400
+ "CYCLE BREAK: cannot narrow tools — all tools are cycling, keeping original set",
2401
+ )
2345
2402
  logger.info(
2346
2403
  "tool_choice forced to 'required' by TOOL STATE MACHINE (phase=%s reason=%s forced_budget=%d)",
2347
2404
  monitor.tool_turn_phase,
@@ -4962,28 +5019,86 @@ async def messages(request: Request):
4962
5019
  monitor.log_status()
4963
5020
 
4964
5021
  # --- Option C: Prune conversation if approaching context limit ---
5022
+ # Option 1: Prefer upstream actual token count over local estimate
4965
5023
  ctx_window = monitor.context_window
4966
5024
  if ctx_window > 0:
4967
- utilization = estimated_tokens / ctx_window
5025
+ # Use the upstream's actual prompt_tokens if available and higher
5026
+ # than the local estimate (the upstream counts chat template overhead,
5027
+ # tool schema tokenization, etc. that local heuristics miss).
5028
+ effective_tokens = estimated_tokens
5029
+ if monitor.last_input_tokens > estimated_tokens:
5030
+ effective_tokens = monitor.last_input_tokens
5031
+ logger.info(
5032
+ "Using upstream token count %d (local estimate %d) for prune decision",
5033
+ effective_tokens,
5034
+ estimated_tokens,
5035
+ )
5036
+ utilization = effective_tokens / ctx_window
4968
5037
  if utilization >= PROXY_CONTEXT_PRUNE_THRESHOLD:
4969
5038
  logger.warning(
4970
5039
  "Context utilization %.1f%% exceeds threshold %.1f%% -- pruning conversation",
4971
5040
  utilization * 100,
4972
5041
  PROXY_CONTEXT_PRUNE_THRESHOLD * 100,
4973
5042
  )
5043
+ # Option 3: Aggressive pruning at critical utilization
5044
+ target_frac = _resolve_prune_target_fraction()
5045
+ keep_last = 8
5046
+ if utilization >= 0.90:
5047
+ keep_last = 4
5048
+ target_frac = min(target_frac, 0.40)
5049
+ logger.warning(
5050
+ "CRITICAL PRUNE: utilization %.1f%% >= 90%%, using keep_last=%d target=%.0f%%",
5051
+ utilization * 100,
5052
+ keep_last,
5053
+ target_frac * 100,
5054
+ )
4974
5055
  body = prune_conversation(
4975
- body, ctx_window, target_fraction=_resolve_prune_target_fraction()
5056
+ body, ctx_window, target_fraction=target_frac, keep_last=keep_last
4976
5057
  )
4977
5058
  monitor.prune_count += 1
4978
- # Re-estimate after pruning
5059
+ # Option 4: Post-prune validation — verify actual reduction
4979
5060
  estimated_tokens = estimate_total_tokens(body)
4980
5061
  monitor.record_request(estimated_tokens)
5062
+ post_util = estimated_tokens / ctx_window
4981
5063
  n_messages = len(body.get("messages", []))
4982
5064
  logger.info(
4983
- "After pruning: ~%d tokens, %d messages",
5065
+ "After pruning: ~%d tokens (%d messages), utilization %.1f%%",
4984
5066
  estimated_tokens,
4985
5067
  n_messages,
5068
+ post_util * 100,
4986
5069
  )
5070
+ # If still above threshold after first prune, do aggressive second pass
5071
+ if post_util >= PROXY_CONTEXT_PRUNE_THRESHOLD:
5072
+ logger.warning(
5073
+ "POST-PRUNE VALIDATION: still at %.1f%% after prune, doing aggressive pass",
5074
+ post_util * 100,
5075
+ )
5076
+ body = prune_conversation(
5077
+ body, ctx_window, target_fraction=0.35, keep_last=4
5078
+ )
5079
+ monitor.prune_count += 1
5080
+ estimated_tokens = estimate_total_tokens(body)
5081
+ monitor.record_request(estimated_tokens)
5082
+ post_util = estimated_tokens / ctx_window
5083
+ n_messages = len(body.get("messages", []))
5084
+ logger.info(
5085
+ "After aggressive prune: ~%d tokens (%d messages), utilization %.1f%%",
5086
+ estimated_tokens,
5087
+ n_messages,
5088
+ post_util * 100,
5089
+ )
5090
+ # Option 2: Circuit breaker — if 3+ consecutive prunes and still above,
5091
+ # force finalize (drop tools, let model wrap up)
5092
+ if monitor.prune_count >= 3 and post_util >= PROXY_CONTEXT_PRUNE_THRESHOLD:
5093
+ logger.error(
5094
+ "PRUNE CIRCUIT BREAKER: %d consecutive prunes, still at %.1f%%. "
5095
+ "Forcing finalize to prevent death spiral.",
5096
+ monitor.prune_count,
5097
+ post_util * 100,
5098
+ )
5099
+ monitor.set_tool_turn_phase("finalize", reason="prune_circuit_breaker")
5100
+ monitor.tool_state_auto_budget_remaining = 1
5101
+ monitor.reset_completion_recovery()
4987
5102
 
4988
5103
  openai_body = build_openai_request(
4989
5104
  body,
@@ -5104,6 +5219,10 @@ async def messages(request: Request):
5104
5219
 
5105
5220
  anthropic_resp = openai_to_anthropic_response(openai_resp, model)
5106
5221
  monitor.record_response(anthropic_resp.get("usage", {}).get("output_tokens", 0))
5222
+ # Update last_input_tokens from upstream's actual prompt_tokens
5223
+ upstream_input = anthropic_resp.get("usage", {}).get("input_tokens", 0)
5224
+ if upstream_input > 0:
5225
+ monitor.last_input_tokens = upstream_input
5107
5226
  if PROXY_FORCE_NON_STREAM:
5108
5227
  logger.info(
5109
5228
  "FORCED NON-STREAM: served stream response via guarded non-stream path"
@@ -5441,6 +5560,10 @@ async def messages(request: Request):
5441
5560
  # Track output tokens in session monitor
5442
5561
  output_tokens = anthropic_resp.get("usage", {}).get("output_tokens", 0)
5443
5562
  monitor.record_response(output_tokens)
5563
+ # Update last_input_tokens from upstream's actual prompt_tokens
5564
+ upstream_input = anthropic_resp.get("usage", {}).get("input_tokens", 0)
5565
+ if upstream_input > 0:
5566
+ monitor.last_input_tokens = upstream_input
5444
5567
 
5445
5568
  return anthropic_resp
5446
5569
 
@@ -3169,6 +3169,214 @@ class TestToolStarvationBreaker(unittest.TestCase):
3169
3169
  self.assertIn("tools", result)
3170
3170
 
3171
3171
 
3172
+ class TestPruningImprovements(unittest.TestCase):
3173
+ """Tests for pruning death spiral fixes."""
3174
+
3175
+ def test_prune_uses_upstream_tokens_when_higher(self):
3176
+ """Option 1: upstream last_input_tokens used when higher than local estimate."""
3177
+ monitor = proxy.SessionMonitor(context_window=10000)
3178
+ # Simulate upstream reporting higher token count than local estimate
3179
+ monitor.last_input_tokens = 9000 # 90% - above 85% threshold
3180
+ body = {
3181
+ "model": "test",
3182
+ "messages": [
3183
+ {"role": "user", "content": "start"},
3184
+ {"role": "assistant", "content": "ok"},
3185
+ {"role": "user", "content": "a" * 100},
3186
+ {"role": "assistant", "content": "b" * 100},
3187
+ {"role": "user", "content": "c" * 100},
3188
+ {"role": "assistant", "content": "d" * 100},
3189
+ {"role": "user", "content": "e" * 100},
3190
+ {"role": "assistant", "content": "f" * 100},
3191
+ {"role": "user", "content": "g" * 100},
3192
+ {"role": "assistant", "content": "h" * 100},
3193
+ {"role": "user", "content": "continue"},
3194
+ ],
3195
+ }
3196
+ # Local estimate_total_tokens will be much lower than 9000
3197
+ local_est = proxy.estimate_total_tokens(body)
3198
+ self.assertLess(local_est, 9000)
3199
+ # The pruning code should use upstream's 9000 for the decision
3200
+
3201
+ def test_prune_conversation_accepts_keep_last(self):
3202
+ """Option 3: prune_conversation accepts keep_last parameter."""
3203
+ body = {
3204
+ "messages": [
3205
+ {"role": "user", "content": "first"},
3206
+ {"role": "assistant", "content": "a" * 500},
3207
+ {"role": "user", "content": "b" * 500},
3208
+ {"role": "assistant", "content": "c" * 500},
3209
+ {"role": "user", "content": "d" * 500},
3210
+ {"role": "assistant", "content": "e" * 500},
3211
+ {"role": "user", "content": "f" * 500},
3212
+ {"role": "assistant", "content": "g" * 500},
3213
+ {"role": "user", "content": "h" * 500},
3214
+ {"role": "assistant", "content": "i" * 500},
3215
+ {"role": "user", "content": "last"},
3216
+ ],
3217
+ }
3218
+ # With keep_last=4, more middle messages should be prunable
3219
+ result_8 = proxy.prune_conversation(dict(body), 2000, target_fraction=0.50, keep_last=8)
3220
+ result_4 = proxy.prune_conversation(dict(body), 2000, target_fraction=0.50, keep_last=4)
3221
+ # keep_last=4 should result in fewer or equal messages
3222
+ self.assertLessEqual(
3223
+ len(result_4.get("messages", [])),
3224
+ len(result_8.get("messages", [])),
3225
+ )
3226
+
3227
+ def test_prune_circuit_breaker_sets_finalize(self):
3228
+ """Option 2: circuit breaker forces finalize after repeated prunes."""
3229
+ monitor = proxy.SessionMonitor(context_window=10000)
3230
+ monitor.prune_count = 3 # Already pruned 3 times
3231
+ # After the pruning code runs and still exceeds threshold,
3232
+ # it should set finalize phase
3233
+ monitor.set_tool_turn_phase("act", reason="test")
3234
+ # Simulate the circuit breaker logic
3235
+ monitor.set_tool_turn_phase("finalize", reason="prune_circuit_breaker")
3236
+ self.assertEqual(monitor.tool_turn_phase, "finalize")
3237
+
3238
+
3239
+ class TestCycleBreakOptions(unittest.TestCase):
3240
+ """Tests for cycle-break options: hint injection, tool narrowing, reduced budgets."""
3241
+
3242
+ def test_cycle_break_injects_hint_message(self):
3243
+ """Option 1: cycle detection injects a user hint about the cycling tools."""
3244
+ old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
3245
+ old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
3246
+ old_forced = getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET")
3247
+ old_auto = getattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET")
3248
+ old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
3249
+ old_cycle_window = getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW")
3250
+ try:
3251
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
3252
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
3253
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
3254
+ setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", 2)
3255
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 99)
3256
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 4)
3257
+
3258
+ monitor = proxy.SessionMonitor(context_window=262144)
3259
+ monitor.tool_turn_phase = "act"
3260
+ monitor.tool_state_forced_budget_remaining = 20
3261
+ monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
3262
+ monitor.last_tool_fingerprint = "Bash"
3263
+
3264
+ body = {
3265
+ "model": "test",
3266
+ "messages": [
3267
+ {"role": "user", "content": "start"},
3268
+ {
3269
+ "role": "assistant",
3270
+ "content": [
3271
+ {"type": "tool_use", "id": "t1", "name": "Bash", "input": {"command": "ls"}},
3272
+ ],
3273
+ },
3274
+ {
3275
+ "role": "user",
3276
+ "content": [
3277
+ {"type": "tool_result", "tool_use_id": "t1", "content": "ok"},
3278
+ ],
3279
+ },
3280
+ ],
3281
+ "tools": [
3282
+ {"name": "Bash", "description": "Run command", "input_schema": {"type": "object"}},
3283
+ {"name": "Read", "description": "Read file", "input_schema": {"type": "object"}},
3284
+ ],
3285
+ }
3286
+
3287
+ openai = proxy.build_openai_request(body, monitor)
3288
+ self.assertEqual(monitor.tool_turn_phase, "review")
3289
+ # Check that a cycle-break hint was injected
3290
+ messages = openai.get("messages", [])
3291
+ last_msg = messages[-1] if messages else {}
3292
+ self.assertEqual(last_msg.get("role"), "user")
3293
+ self.assertIn("Bash", last_msg.get("content", ""))
3294
+ self.assertIn("DIFFERENT tool", last_msg.get("content", ""))
3295
+ finally:
3296
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
3297
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
3298
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", old_forced)
3299
+ setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", old_auto)
3300
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", old_stagnation)
3301
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", old_cycle_window)
3302
+
3303
+ def test_cycle_break_narrows_tools(self):
3304
+ """Option 2: cycling tools are excluded from the tools array during review."""
3305
+ old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
3306
+ old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
3307
+ old_forced = getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET")
3308
+ old_auto = getattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET")
3309
+ old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
3310
+ old_cycle_window = getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW")
3311
+ try:
3312
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
3313
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
3314
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
3315
+ setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", 2)
3316
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 99)
3317
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 4)
3318
+
3319
+ monitor = proxy.SessionMonitor(context_window=262144)
3320
+ monitor.tool_turn_phase = "act"
3321
+ monitor.tool_state_forced_budget_remaining = 20
3322
+ monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
3323
+ monitor.last_tool_fingerprint = "Bash"
3324
+
3325
+ body = {
3326
+ "model": "test",
3327
+ "messages": [
3328
+ {"role": "user", "content": "start"},
3329
+ {
3330
+ "role": "assistant",
3331
+ "content": [
3332
+ {"type": "tool_use", "id": "t1", "name": "Bash", "input": {"command": "ls"}},
3333
+ ],
3334
+ },
3335
+ {
3336
+ "role": "user",
3337
+ "content": [
3338
+ {"type": "tool_result", "tool_use_id": "t1", "content": "ok"},
3339
+ ],
3340
+ },
3341
+ ],
3342
+ "tools": [
3343
+ {"name": "Bash", "description": "Run command", "input_schema": {"type": "object"}},
3344
+ {"name": "Read", "description": "Read file", "input_schema": {"type": "object"}},
3345
+ {"name": "Write", "description": "Write file", "input_schema": {"type": "object"}},
3346
+ ],
3347
+ }
3348
+
3349
+ openai = proxy.build_openai_request(body, monitor)
3350
+ self.assertEqual(monitor.tool_turn_phase, "review")
3351
+ # Bash should be excluded, Read and Write should remain
3352
+ tool_names = [t["function"]["name"] for t in openai.get("tools", [])]
3353
+ self.assertNotIn("Bash", tool_names)
3354
+ self.assertIn("Read", tool_names)
3355
+ self.assertIn("Write", tool_names)
3356
+ finally:
3357
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
3358
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
3359
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", old_forced)
3360
+ setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", old_auto)
3361
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", old_stagnation)
3362
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", old_cycle_window)
3363
+
3364
+ def test_forced_budget_default_is_12(self):
3365
+ """Option 3: default forced budget reduced from 24 to 12."""
3366
+ self.assertEqual(proxy.PROXY_TOOL_STATE_FORCED_BUDGET, 12)
3367
+
3368
+ def test_review_cycle_limit_default_is_1(self):
3369
+ """Option 4: default review cycle limit reduced from 2 to 1."""
3370
+ self.assertEqual(proxy.PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT, 1)
3371
+
3372
+ def test_cycling_tool_names_cleared_on_reset(self):
3373
+ """cycling_tool_names is cleared when tool turn state resets."""
3374
+ monitor = proxy.SessionMonitor(context_window=262144)
3375
+ monitor.cycling_tool_names = ["Bash", "Read"]
3376
+ monitor.reset_tool_turn_state(reason="test")
3377
+ self.assertEqual(monitor.cycling_tool_names, [])
3378
+
3379
+
3172
3380
  if __name__ == "__main__":
3173
3381
  unittest.main()
3174
3382