@miller-tech/uap 1.20.25 → 1.20.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -2281,11 +2281,13 @@ def _resolve_state_machine_tool_choice(
|
|
|
2281
2281
|
1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
|
|
2282
2282
|
)
|
|
2283
2283
|
# If stagnation cleared during review, the model tried a
|
|
2284
|
-
# different approach — reward by reducing cycle pressure
|
|
2284
|
+
# different approach — reward by reducing cycle pressure and
|
|
2285
|
+
# lifting persistent tool exclusion.
|
|
2285
2286
|
if monitor.tool_state_stagnation_streak == 0 and monitor.tool_state_review_cycles > 0:
|
|
2286
2287
|
monitor.tool_state_review_cycles = max(0, monitor.tool_state_review_cycles - 1)
|
|
2288
|
+
monitor.cycling_tool_names = []
|
|
2287
2289
|
logger.info(
|
|
2288
|
-
"TOOL STATE MACHINE: review_cycles decremented to %d (stagnation cleared)",
|
|
2290
|
+
"TOOL STATE MACHINE: review_cycles decremented to %d, cycling exclusion lifted (stagnation cleared)",
|
|
2289
2291
|
monitor.tool_state_review_cycles,
|
|
2290
2292
|
)
|
|
2291
2293
|
return "required", "review_complete"
|
|
@@ -2589,31 +2591,41 @@ def build_openai_request(
|
|
|
2589
2591
|
monitor.no_progress_streak = (
|
|
2590
2592
|
0 if last_user_has_tool_result else monitor.no_progress_streak + 1
|
|
2591
2593
|
)
|
|
2592
|
-
#
|
|
2594
|
+
# Inject cycle-break instruction when entering review
|
|
2595
|
+
# Option 3 (Cycle 14): Escalate hint text based on review cycle count
|
|
2593
2596
|
if (
|
|
2594
2597
|
monitor.tool_turn_phase == "review"
|
|
2595
2598
|
and state_reason in {"cycle_detected", "stagnation"}
|
|
2596
2599
|
and monitor.cycling_tool_names
|
|
2597
2600
|
):
|
|
2598
2601
|
cycling_names = ", ".join(monitor.cycling_tool_names)
|
|
2599
|
-
|
|
2600
|
-
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2602
|
+
cycles = monitor.tool_state_review_cycles
|
|
2603
|
+
if cycles <= 1:
|
|
2604
|
+
cycle_hint = (
|
|
2605
|
+
f"You have been repeatedly calling the same tool(s): {cycling_names}. "
|
|
2606
|
+
"This is not making progress. Use a DIFFERENT tool to advance the task, "
|
|
2607
|
+
"or call a tool that produces your final answer."
|
|
2608
|
+
)
|
|
2609
|
+
else:
|
|
2610
|
+
cycle_hint = (
|
|
2611
|
+
f"CRITICAL: You have cycled {cycling_names} for {cycles} review rounds without progress. "
|
|
2612
|
+
"State what you have accomplished so far and what the next DIFFERENT action should be. "
|
|
2613
|
+
"Do NOT call the same tool again. Choose a completely different approach or "
|
|
2614
|
+
"produce your final answer now."
|
|
2615
|
+
)
|
|
2604
2616
|
messages = openai_body.get("messages", [])
|
|
2605
2617
|
messages.append({"role": "user", "content": cycle_hint})
|
|
2606
2618
|
openai_body["messages"] = messages
|
|
2607
2619
|
logger.warning(
|
|
2608
|
-
"CYCLE BREAK: injected hint about cycling tools: %s",
|
|
2620
|
+
"CYCLE BREAK: injected hint about cycling tools: %s (escalation=%d)",
|
|
2609
2621
|
cycling_names,
|
|
2622
|
+
cycles,
|
|
2610
2623
|
)
|
|
2611
|
-
#
|
|
2612
|
-
# Option 1
|
|
2613
|
-
#
|
|
2624
|
+
# Narrow tools to exclude cycling tools
|
|
2625
|
+
# Option 1 (Cycle 13): if any cycling tool is read-only, exclude entire class
|
|
2626
|
+
# Option 1 (Cycle 14): persist exclusion during act phase too, not just review
|
|
2614
2627
|
if (
|
|
2615
|
-
monitor.
|
|
2616
|
-
and monitor.cycling_tool_names
|
|
2628
|
+
monitor.cycling_tool_names
|
|
2617
2629
|
and "tools" in openai_body
|
|
2618
2630
|
):
|
|
2619
2631
|
exclude_set = set(monitor.cycling_tool_names)
|
|
@@ -4406,3 +4406,172 @@ class TestReadOnlyCycleClassExclusion(unittest.TestCase):
|
|
|
4406
4406
|
|
|
4407
4407
|
dup, _ = monitor.has_duplicate_read_target(threshold=3)
|
|
4408
4408
|
self.assertFalse(dup)
|
|
4409
|
+
|
|
4410
|
+
|
|
4411
|
+
class TestPersistentCycleExclusion(unittest.TestCase):
|
|
4412
|
+
"""Tests for Cycle 14: persistent exclusion, escalating hints, and
|
|
4413
|
+
exclusion across review→act transitions."""
|
|
4414
|
+
|
|
4415
|
+
def _make_body_with_tools(self, tool_names, active_tool="bash", active_input=None):
|
|
4416
|
+
tools = [
|
|
4417
|
+
{"name": n, "description": f"{n} tool", "input_schema": {"type": "object"}}
|
|
4418
|
+
for n in tool_names
|
|
4419
|
+
]
|
|
4420
|
+
inp = active_input or {"command": "ls"}
|
|
4421
|
+
return {
|
|
4422
|
+
"model": "test",
|
|
4423
|
+
"messages": [
|
|
4424
|
+
{"role": "user", "content": "do something"},
|
|
4425
|
+
{
|
|
4426
|
+
"role": "assistant",
|
|
4427
|
+
"content": [
|
|
4428
|
+
{"type": "tool_use", "id": "t1", "name": active_tool, "input": inp}
|
|
4429
|
+
],
|
|
4430
|
+
},
|
|
4431
|
+
{
|
|
4432
|
+
"role": "user",
|
|
4433
|
+
"content": [
|
|
4434
|
+
{"type": "tool_result", "tool_use_id": "t1", "content": "ok"}
|
|
4435
|
+
],
|
|
4436
|
+
},
|
|
4437
|
+
],
|
|
4438
|
+
"tools": tools,
|
|
4439
|
+
}
|
|
4440
|
+
|
|
4441
|
+
def test_exclusion_persists_through_act_phase(self):
|
|
4442
|
+
"""Option 1: cycling_tool_names exclusion persists in act phase after review."""
|
|
4443
|
+
old_vals = {}
|
|
4444
|
+
for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
|
|
4445
|
+
"PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
|
|
4446
|
+
"PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
|
|
4447
|
+
old_vals[k] = getattr(proxy, k)
|
|
4448
|
+
try:
|
|
4449
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
|
|
4450
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
|
|
4451
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 6)
|
|
4452
|
+
setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
|
|
4453
|
+
setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
|
|
4454
|
+
|
|
4455
|
+
all_tools = ["bash", "read", "write", "edit"]
|
|
4456
|
+
body = self._make_body_with_tools(all_tools)
|
|
4457
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
4458
|
+
|
|
4459
|
+
# Simulate bash cycling that triggers review
|
|
4460
|
+
monitor.cycling_tool_names = ["bash"]
|
|
4461
|
+
monitor.tool_turn_phase = "act"
|
|
4462
|
+
monitor.tool_state_forced_budget_remaining = 5
|
|
4463
|
+
|
|
4464
|
+
openai = proxy.build_openai_request(body, monitor)
|
|
4465
|
+
|
|
4466
|
+
# In act phase with cycling_tool_names set, bash should be excluded
|
|
4467
|
+
remaining = [t["function"]["name"] for t in openai.get("tools", [])]
|
|
4468
|
+
self.assertNotIn("bash", remaining)
|
|
4469
|
+
self.assertIn("read", remaining)
|
|
4470
|
+
self.assertIn("write", remaining)
|
|
4471
|
+
finally:
|
|
4472
|
+
for k, v in old_vals.items():
|
|
4473
|
+
setattr(proxy, k, v)
|
|
4474
|
+
|
|
4475
|
+
def test_exclusion_cleared_on_stagnation_clear(self):
|
|
4476
|
+
"""Option 1: cycling exclusion is lifted when stagnation clears in review."""
|
|
4477
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
4478
|
+
monitor.tool_turn_phase = "review"
|
|
4479
|
+
monitor.tool_state_review_cycles = 1
|
|
4480
|
+
monitor.tool_state_stagnation_streak = 0 # stagnation cleared
|
|
4481
|
+
monitor.cycling_tool_names = ["bash"]
|
|
4482
|
+
monitor.tool_state_auto_budget_remaining = 0
|
|
4483
|
+
monitor.tool_state_forced_budget_remaining = 6
|
|
4484
|
+
|
|
4485
|
+
# This should transition review→act and clear cycling names
|
|
4486
|
+
old_vals = {}
|
|
4487
|
+
for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
|
|
4488
|
+
"PROXY_TOOL_STATE_FORCED_BUDGET"]:
|
|
4489
|
+
old_vals[k] = getattr(proxy, k)
|
|
4490
|
+
try:
|
|
4491
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
|
|
4492
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
|
|
4493
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 6)
|
|
4494
|
+
|
|
4495
|
+
body = self._make_body_with_tools(["bash", "read", "write"])
|
|
4496
|
+
proxy.build_openai_request(body, monitor)
|
|
4497
|
+
|
|
4498
|
+
self.assertEqual(monitor.tool_turn_phase, "act")
|
|
4499
|
+
self.assertEqual(monitor.cycling_tool_names, [])
|
|
4500
|
+
finally:
|
|
4501
|
+
for k, v in old_vals.items():
|
|
4502
|
+
setattr(proxy, k, v)
|
|
4503
|
+
|
|
4504
|
+
def test_escalated_hint_on_cycle_2(self):
|
|
4505
|
+
"""Option 3: cycle 2+ gets escalated CRITICAL hint text."""
|
|
4506
|
+
old_vals = {}
|
|
4507
|
+
for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
|
|
4508
|
+
"PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
|
|
4509
|
+
"PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
|
|
4510
|
+
old_vals[k] = getattr(proxy, k)
|
|
4511
|
+
try:
|
|
4512
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
|
|
4513
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
|
|
4514
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
|
|
4515
|
+
setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
|
|
4516
|
+
setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
|
|
4517
|
+
|
|
4518
|
+
all_tools = ["bash", "read", "write"]
|
|
4519
|
+
body = self._make_body_with_tools(all_tools)
|
|
4520
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
4521
|
+
# Pre-set as if we've already been through 1 review cycle
|
|
4522
|
+
monitor.tool_turn_phase = "act"
|
|
4523
|
+
monitor.tool_state_review_cycles = 1
|
|
4524
|
+
monitor.tool_state_forced_budget_remaining = 20
|
|
4525
|
+
monitor.tool_state_stagnation_streak = 3
|
|
4526
|
+
fp = "bash:781c24ad"
|
|
4527
|
+
monitor.tool_call_history = [fp, fp, fp]
|
|
4528
|
+
monitor.last_tool_fingerprint = fp
|
|
4529
|
+
|
|
4530
|
+
openai = proxy.build_openai_request(body, monitor)
|
|
4531
|
+
|
|
4532
|
+
# Should now be in review with cycles=2 and escalated hint
|
|
4533
|
+
self.assertEqual(monitor.tool_turn_phase, "review")
|
|
4534
|
+
self.assertEqual(monitor.tool_state_review_cycles, 2)
|
|
4535
|
+
messages = openai.get("messages", [])
|
|
4536
|
+
last_user = [m for m in messages if m.get("role") == "user"][-1]
|
|
4537
|
+
self.assertIn("CRITICAL", last_user["content"])
|
|
4538
|
+
self.assertIn("2 review rounds", last_user["content"])
|
|
4539
|
+
finally:
|
|
4540
|
+
for k, v in old_vals.items():
|
|
4541
|
+
setattr(proxy, k, v)
|
|
4542
|
+
|
|
4543
|
+
def test_mild_hint_on_cycle_1(self):
|
|
4544
|
+
"""Option 3: cycle 1 gets mild hint, not escalated."""
|
|
4545
|
+
old_vals = {}
|
|
4546
|
+
for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
|
|
4547
|
+
"PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
|
|
4548
|
+
"PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
|
|
4549
|
+
old_vals[k] = getattr(proxy, k)
|
|
4550
|
+
try:
|
|
4551
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
|
|
4552
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
|
|
4553
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
|
|
4554
|
+
setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
|
|
4555
|
+
setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
|
|
4556
|
+
|
|
4557
|
+
body = self._make_body_with_tools(["bash", "read", "write"])
|
|
4558
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
4559
|
+
monitor.tool_turn_phase = "act"
|
|
4560
|
+
monitor.tool_state_review_cycles = 0
|
|
4561
|
+
monitor.tool_state_forced_budget_remaining = 20
|
|
4562
|
+
monitor.tool_state_stagnation_streak = 3
|
|
4563
|
+
fp = "bash:781c24ad"
|
|
4564
|
+
monitor.tool_call_history = [fp, fp, fp]
|
|
4565
|
+
monitor.last_tool_fingerprint = fp
|
|
4566
|
+
|
|
4567
|
+
openai = proxy.build_openai_request(body, monitor)
|
|
4568
|
+
|
|
4569
|
+
self.assertEqual(monitor.tool_turn_phase, "review")
|
|
4570
|
+
self.assertEqual(monitor.tool_state_review_cycles, 1)
|
|
4571
|
+
messages = openai.get("messages", [])
|
|
4572
|
+
last_user = [m for m in messages if m.get("role") == "user"][-1]
|
|
4573
|
+
self.assertNotIn("CRITICAL", last_user["content"])
|
|
4574
|
+
self.assertIn("DIFFERENT tool", last_user["content"])
|
|
4575
|
+
finally:
|
|
4576
|
+
for k, v in old_vals.items():
|
|
4577
|
+
setattr(proxy, k, v)
|