@miller-tech/uap 1.20.25 → 1.20.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.25",
3
+ "version": "1.20.27",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -2281,11 +2281,13 @@ def _resolve_state_machine_tool_choice(
2281
2281
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2282
2282
  )
2283
2283
  # If stagnation cleared during review, the model tried a
2284
- # different approach — reward by reducing cycle pressure.
2284
+ # different approach — reward by reducing cycle pressure and
2285
+ # lifting persistent tool exclusion.
2285
2286
  if monitor.tool_state_stagnation_streak == 0 and monitor.tool_state_review_cycles > 0:
2286
2287
  monitor.tool_state_review_cycles = max(0, monitor.tool_state_review_cycles - 1)
2288
+ monitor.cycling_tool_names = []
2287
2289
  logger.info(
2288
- "TOOL STATE MACHINE: review_cycles decremented to %d (stagnation cleared)",
2290
+ "TOOL STATE MACHINE: review_cycles decremented to %d, cycling exclusion lifted (stagnation cleared)",
2289
2291
  monitor.tool_state_review_cycles,
2290
2292
  )
2291
2293
  return "required", "review_complete"
@@ -2463,14 +2465,22 @@ def build_openai_request(
2463
2465
  openai_body["stop"] = anthropic_body["stop_sequences"]
2464
2466
 
2465
2467
  # Force controlled temperature for tool-call turns to reduce garbled output
2468
+ # Cycle 15 Option 2: use lower temperature after contamination resets
2466
2469
  if has_tools:
2467
2470
  client_temp = openai_body.get("temperature")
2468
- if client_temp is None or client_temp > PROXY_TOOL_TURN_TEMPERATURE:
2469
- openai_body["temperature"] = PROXY_TOOL_TURN_TEMPERATURE
2471
+ target_temp = PROXY_TOOL_TURN_TEMPERATURE
2472
+ if monitor.contamination_resets > 0:
2473
+ target_temp = min(target_temp, 0.1)
2474
+ if client_temp is None or client_temp > target_temp:
2475
+ openai_body["temperature"] = target_temp
2476
+ extra = ""
2477
+ if monitor.contamination_resets > 0:
2478
+ extra = f" (post-contamination reset, resets={monitor.contamination_resets})"
2470
2479
  logger.info(
2471
- "TOOL TURN TEMP: forcing temperature=%.2f (was %s) for tool-enabled request",
2472
- PROXY_TOOL_TURN_TEMPERATURE,
2480
+ "TOOL TURN TEMP: forcing temperature=%.2f (was %s) for tool-enabled request%s",
2481
+ target_temp,
2473
2482
  client_temp,
2483
+ extra,
2474
2484
  )
2475
2485
 
2476
2486
  # Convert Anthropic tools to OpenAI function-calling tools
@@ -2589,31 +2599,41 @@ def build_openai_request(
2589
2599
  monitor.no_progress_streak = (
2590
2600
  0 if last_user_has_tool_result else monitor.no_progress_streak + 1
2591
2601
  )
2592
- # Option 1: Inject cycle-break instruction when entering review
2602
+ # Inject cycle-break instruction when entering review
2603
+ # Option 3 (Cycle 14): Escalate hint text based on review cycle count
2593
2604
  if (
2594
2605
  monitor.tool_turn_phase == "review"
2595
2606
  and state_reason in {"cycle_detected", "stagnation"}
2596
2607
  and monitor.cycling_tool_names
2597
2608
  ):
2598
2609
  cycling_names = ", ".join(monitor.cycling_tool_names)
2599
- cycle_hint = (
2600
- f"You have been repeatedly calling the same tool(s): {cycling_names}. "
2601
- "This is not making progress. Use a DIFFERENT tool to advance the task, "
2602
- "or call a tool that produces your final answer."
2603
- )
2610
+ cycles = monitor.tool_state_review_cycles
2611
+ if cycles <= 1:
2612
+ cycle_hint = (
2613
+ f"You have been repeatedly calling the same tool(s): {cycling_names}. "
2614
+ "This is not making progress. Use a DIFFERENT tool to advance the task, "
2615
+ "or call a tool that produces your final answer."
2616
+ )
2617
+ else:
2618
+ cycle_hint = (
2619
+ f"CRITICAL: You have cycled {cycling_names} for {cycles} review rounds without progress. "
2620
+ "State what you have accomplished so far and what the next DIFFERENT action should be. "
2621
+ "Do NOT call the same tool again. Choose a completely different approach or "
2622
+ "produce your final answer now."
2623
+ )
2604
2624
  messages = openai_body.get("messages", [])
2605
2625
  messages.append({"role": "user", "content": cycle_hint})
2606
2626
  openai_body["messages"] = messages
2607
2627
  logger.warning(
2608
- "CYCLE BREAK: injected hint about cycling tools: %s",
2628
+ "CYCLE BREAK: injected hint about cycling tools: %s (escalation=%d)",
2609
2629
  cycling_names,
2630
+ cycles,
2610
2631
  )
2611
- # Option 2: Narrow tools during review to exclude cycling tools
2612
- # Option 1 enhancement: if any cycling tool is read-only, exclude
2613
- # the entire read-only class to prevent tool-hopping (read→glob→grep)
2632
+ # Narrow tools to exclude cycling tools
2633
+ # Option 1 (Cycle 13): if any cycling tool is read-only, exclude entire class
2634
+ # Option 1 (Cycle 14): persist exclusion during act phase too, not just review
2614
2635
  if (
2615
- monitor.tool_turn_phase == "review"
2616
- and monitor.cycling_tool_names
2636
+ monitor.cycling_tool_names
2617
2637
  and "tools" in openai_body
2618
2638
  ):
2619
2639
  exclude_set = set(monitor.cycling_tool_names)
@@ -4679,7 +4699,7 @@ async def _apply_malformed_tool_guardrail(
4679
4699
 
4680
4700
  attempts = max(0, PROXY_MALFORMED_TOOL_RETRY_MAX)
4681
4701
  current_issue = issue
4682
- # Track failing tool names for Option 3 (tool narrowing on retry)
4702
+ # Track failing tool names for tool narrowing on retry
4683
4703
  failing_tools: set[str] = set()
4684
4704
  if issue.kind == "invalid_tool_args":
4685
4705
  for tc in (working_resp.get("choices", [{}])[0].get("message", {}).get("tool_calls", [])):
@@ -4687,14 +4707,22 @@ async def _apply_malformed_tool_guardrail(
4687
4707
  raw_args = tc.get("function", {}).get("arguments", "")
4688
4708
  if fn_name and raw_args and _is_garbled_tool_arguments(raw_args):
4689
4709
  failing_tools.add(fn_name)
4710
+ # Cycle 15 Option 1: For malformed_payload retries, exclude complex
4711
+ # multi-field tools (task, Agent) that are prone to garbled generation
4712
+ # after the first retry fails.
4713
+ _COMPLEX_TOOLS_TO_EXCLUDE_ON_MALFORMED = {"task", "Agent"}
4714
+ malformed_exclude_active = False
4690
4715
  for attempt in range(attempts):
4691
4716
  attempt_tool_choice = _retry_tool_choice_for_attempt(
4692
4717
  required_tool_choice,
4693
4718
  attempt,
4694
4719
  attempts,
4695
4720
  )
4696
- # Option 3: On attempt >= 2, exclude consistently failing tools
4697
- exclude = list(failing_tools) if attempt >= 1 and failing_tools else None
4721
+ # On attempt >= 1, exclude consistently failing tools OR complex tools for malformed
4722
+ exclude_set = set(failing_tools) if failing_tools else set()
4723
+ if malformed_exclude_active:
4724
+ exclude_set |= _COMPLEX_TOOLS_TO_EXCLUDE_ON_MALFORMED
4725
+ exclude = list(exclude_set) if (attempt >= 1 and exclude_set) else None
4698
4726
  retry_body = _build_malformed_retry_body(
4699
4727
  openai_body,
4700
4728
  anthropic_body,
@@ -4773,6 +4801,8 @@ async def _apply_malformed_tool_guardrail(
4773
4801
 
4774
4802
  if retry_issue.kind == "malformed_payload":
4775
4803
  monitor.malformed_tool_streak += 1
4804
+ # Cycle 15 Option 1: activate complex tool exclusion for next retry
4805
+ malformed_exclude_active = True
4776
4806
  elif retry_issue.kind == "invalid_tool_args":
4777
4807
  monitor.invalid_tool_call_streak += 1
4778
4808
  monitor.arg_preflight_rejections += 1
@@ -4886,6 +4916,35 @@ def _maybe_apply_session_contamination_breaker(
4886
4916
  if not should_reset:
4887
4917
  return anthropic_body
4888
4918
 
4919
+ # Cycle 15 Option 3: if contamination has already reset N+ times in this
4920
+ # session, the model is fundamentally unable to produce valid tool calls.
4921
+ # Force finalize so the Droid framework can intervene.
4922
+ max_contamination_resets = 3
4923
+ if monitor.contamination_resets >= max_contamination_resets:
4924
+ logger.error(
4925
+ "SESSION CONTAMINATION LOOP: session=%s contamination_resets=%d >= %d, forcing finalize",
4926
+ session_id,
4927
+ monitor.contamination_resets,
4928
+ max_contamination_resets,
4929
+ )
4930
+ monitor.set_tool_turn_phase("finalize", reason="contamination_loop")
4931
+ monitor.contamination_resets += 1
4932
+ monitor.malformed_tool_streak = 0
4933
+ monitor.invalid_tool_call_streak = 0
4934
+ # Remove tools to force text-only response
4935
+ updated = dict(anthropic_body)
4936
+ updated.pop("tools", None)
4937
+ updated.pop("tool_choice", None)
4938
+ msgs = updated.get("messages", [])
4939
+ msgs.append({
4940
+ "role": "user",
4941
+ "content": (
4942
+ "Tool-call generation has failed repeatedly. Respond with plain text only. "
4943
+ "Summarize what you have accomplished and what remains to be done."
4944
+ ),
4945
+ })
4946
+ return updated
4947
+
4889
4948
  messages = anthropic_body.get("messages", [])
4890
4949
  keep_last = max(2, PROXY_SESSION_CONTAMINATION_KEEP_LAST)
4891
4950
  if len(messages) <= keep_last + 1:
@@ -4406,3 +4406,259 @@ class TestReadOnlyCycleClassExclusion(unittest.TestCase):
4406
4406
 
4407
4407
  dup, _ = monitor.has_duplicate_read_target(threshold=3)
4408
4408
  self.assertFalse(dup)
4409
+
4410
+
4411
+ class TestPersistentCycleExclusion(unittest.TestCase):
4412
+ """Tests for Cycle 14: persistent exclusion, escalating hints, and
4413
+ exclusion across review→act transitions."""
4414
+
4415
+ def _make_body_with_tools(self, tool_names, active_tool="bash", active_input=None):
4416
+ tools = [
4417
+ {"name": n, "description": f"{n} tool", "input_schema": {"type": "object"}}
4418
+ for n in tool_names
4419
+ ]
4420
+ inp = active_input or {"command": "ls"}
4421
+ return {
4422
+ "model": "test",
4423
+ "messages": [
4424
+ {"role": "user", "content": "do something"},
4425
+ {
4426
+ "role": "assistant",
4427
+ "content": [
4428
+ {"type": "tool_use", "id": "t1", "name": active_tool, "input": inp}
4429
+ ],
4430
+ },
4431
+ {
4432
+ "role": "user",
4433
+ "content": [
4434
+ {"type": "tool_result", "tool_use_id": "t1", "content": "ok"}
4435
+ ],
4436
+ },
4437
+ ],
4438
+ "tools": tools,
4439
+ }
4440
+
4441
+ def test_exclusion_persists_through_act_phase(self):
4442
+ """Option 1: cycling_tool_names exclusion persists in act phase after review."""
4443
+ old_vals = {}
4444
+ for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
4445
+ "PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
4446
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
4447
+ old_vals[k] = getattr(proxy, k)
4448
+ try:
4449
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4450
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4451
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 6)
4452
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4453
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4454
+
4455
+ all_tools = ["bash", "read", "write", "edit"]
4456
+ body = self._make_body_with_tools(all_tools)
4457
+ monitor = proxy.SessionMonitor(context_window=262144)
4458
+
4459
+ # Simulate bash cycling that triggers review
4460
+ monitor.cycling_tool_names = ["bash"]
4461
+ monitor.tool_turn_phase = "act"
4462
+ monitor.tool_state_forced_budget_remaining = 5
4463
+
4464
+ openai = proxy.build_openai_request(body, monitor)
4465
+
4466
+ # In act phase with cycling_tool_names set, bash should be excluded
4467
+ remaining = [t["function"]["name"] for t in openai.get("tools", [])]
4468
+ self.assertNotIn("bash", remaining)
4469
+ self.assertIn("read", remaining)
4470
+ self.assertIn("write", remaining)
4471
+ finally:
4472
+ for k, v in old_vals.items():
4473
+ setattr(proxy, k, v)
4474
+
4475
+ def test_exclusion_cleared_on_stagnation_clear(self):
4476
+ """Option 1: cycling exclusion is lifted when stagnation clears in review."""
4477
+ monitor = proxy.SessionMonitor(context_window=262144)
4478
+ monitor.tool_turn_phase = "review"
4479
+ monitor.tool_state_review_cycles = 1
4480
+ monitor.tool_state_stagnation_streak = 0 # stagnation cleared
4481
+ monitor.cycling_tool_names = ["bash"]
4482
+ monitor.tool_state_auto_budget_remaining = 0
4483
+ monitor.tool_state_forced_budget_remaining = 6
4484
+
4485
+ # This should transition review→act and clear cycling names
4486
+ old_vals = {}
4487
+ for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
4488
+ "PROXY_TOOL_STATE_FORCED_BUDGET"]:
4489
+ old_vals[k] = getattr(proxy, k)
4490
+ try:
4491
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4492
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4493
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 6)
4494
+
4495
+ body = self._make_body_with_tools(["bash", "read", "write"])
4496
+ proxy.build_openai_request(body, monitor)
4497
+
4498
+ self.assertEqual(monitor.tool_turn_phase, "act")
4499
+ self.assertEqual(monitor.cycling_tool_names, [])
4500
+ finally:
4501
+ for k, v in old_vals.items():
4502
+ setattr(proxy, k, v)
4503
+
4504
+ def test_escalated_hint_on_cycle_2(self):
4505
+ """Option 3: cycle 2+ gets escalated CRITICAL hint text."""
4506
+ old_vals = {}
4507
+ for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
4508
+ "PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
4509
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
4510
+ old_vals[k] = getattr(proxy, k)
4511
+ try:
4512
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4513
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4514
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
4515
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4516
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4517
+
4518
+ all_tools = ["bash", "read", "write"]
4519
+ body = self._make_body_with_tools(all_tools)
4520
+ monitor = proxy.SessionMonitor(context_window=262144)
4521
+ # Pre-set as if we've already been through 1 review cycle
4522
+ monitor.tool_turn_phase = "act"
4523
+ monitor.tool_state_review_cycles = 1
4524
+ monitor.tool_state_forced_budget_remaining = 20
4525
+ monitor.tool_state_stagnation_streak = 3
4526
+ fp = "bash:781c24ad"
4527
+ monitor.tool_call_history = [fp, fp, fp]
4528
+ monitor.last_tool_fingerprint = fp
4529
+
4530
+ openai = proxy.build_openai_request(body, monitor)
4531
+
4532
+ # Should now be in review with cycles=2 and escalated hint
4533
+ self.assertEqual(monitor.tool_turn_phase, "review")
4534
+ self.assertEqual(monitor.tool_state_review_cycles, 2)
4535
+ messages = openai.get("messages", [])
4536
+ last_user = [m for m in messages if m.get("role") == "user"][-1]
4537
+ self.assertIn("CRITICAL", last_user["content"])
4538
+ self.assertIn("2 review rounds", last_user["content"])
4539
+ finally:
4540
+ for k, v in old_vals.items():
4541
+ setattr(proxy, k, v)
4542
+
4543
+ def test_mild_hint_on_cycle_1(self):
4544
+ """Option 3: cycle 1 gets mild hint, not escalated."""
4545
+ old_vals = {}
4546
+ for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
4547
+ "PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
4548
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
4549
+ old_vals[k] = getattr(proxy, k)
4550
+ try:
4551
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4552
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4553
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
4554
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4555
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4556
+
4557
+ body = self._make_body_with_tools(["bash", "read", "write"])
4558
+ monitor = proxy.SessionMonitor(context_window=262144)
4559
+ monitor.tool_turn_phase = "act"
4560
+ monitor.tool_state_review_cycles = 0
4561
+ monitor.tool_state_forced_budget_remaining = 20
4562
+ monitor.tool_state_stagnation_streak = 3
4563
+ fp = "bash:781c24ad"
4564
+ monitor.tool_call_history = [fp, fp, fp]
4565
+ monitor.last_tool_fingerprint = fp
4566
+
4567
+ openai = proxy.build_openai_request(body, monitor)
4568
+
4569
+ self.assertEqual(monitor.tool_turn_phase, "review")
4570
+ self.assertEqual(monitor.tool_state_review_cycles, 1)
4571
+ messages = openai.get("messages", [])
4572
+ last_user = [m for m in messages if m.get("role") == "user"][-1]
4573
+ self.assertNotIn("CRITICAL", last_user["content"])
4574
+ self.assertIn("DIFFERENT tool", last_user["content"])
4575
+ finally:
4576
+ for k, v in old_vals.items():
4577
+ setattr(proxy, k, v)
4578
+
4579
+
4580
+ class TestMalformedPayloadLoopFix(unittest.TestCase):
4581
+ """Tests for Cycle 15: malformed payload loop breaking."""
4582
+
4583
+ def test_contamination_loop_forces_finalize(self):
4584
+ """Option 3: after 3+ contamination resets, force finalize."""
4585
+ monitor = proxy.SessionMonitor(context_window=262144)
4586
+ monitor.contamination_resets = 3 # already hit 3 resets
4587
+ monitor.malformed_tool_streak = 3 # triggers should_reset
4588
+
4589
+ body = {
4590
+ "model": "test",
4591
+ "messages": [
4592
+ {"role": "user", "content": "do something"},
4593
+ {"role": "assistant", "content": "ok"},
4594
+ {"role": "user", "content": "continue"},
4595
+ ],
4596
+ "tools": [
4597
+ {"name": "bash", "description": "Run", "input_schema": {"type": "object"}},
4598
+ ],
4599
+ }
4600
+ result = proxy._maybe_apply_session_contamination_breaker(
4601
+ body, monitor, "test-session"
4602
+ )
4603
+ # Should have removed tools and forced finalize
4604
+ self.assertNotIn("tools", result)
4605
+ self.assertNotIn("tool_choice", result)
4606
+ self.assertEqual(monitor.tool_turn_phase, "finalize")
4607
+ # Check finalize instruction was injected
4608
+ last_msg = result["messages"][-1]
4609
+ self.assertIn("plain text only", last_msg["content"])
4610
+
4611
+ def test_contamination_below_threshold_resets_normally(self):
4612
+ """Below 3 contamination resets, normal reset behavior."""
4613
+ monitor = proxy.SessionMonitor(context_window=262144)
4614
+ monitor.contamination_resets = 1
4615
+ monitor.malformed_tool_streak = 3
4616
+
4617
+ # Need enough messages (> keep_last + 1) for full reset path
4618
+ msgs = [{"role": "user", "content": "start"}]
4619
+ for i in range(20):
4620
+ msgs.append({"role": "assistant", "content": f"resp {i}"})
4621
+ msgs.append({"role": "user", "content": f"msg {i}"})
4622
+ body = {
4623
+ "model": "test",
4624
+ "messages": msgs,
4625
+ "tools": [
4626
+ {"name": "bash", "description": "Run", "input_schema": {"type": "object"}},
4627
+ ],
4628
+ }
4629
+ result = proxy._maybe_apply_session_contamination_breaker(
4630
+ body, monitor, "test-session"
4631
+ )
4632
+ # Should have done normal reset (increment contamination_resets)
4633
+ self.assertEqual(monitor.contamination_resets, 2)
4634
+ self.assertEqual(monitor.tool_turn_phase, "bootstrap")
4635
+
4636
+ def test_post_contamination_temp_lowered(self):
4637
+ """Option 2: temperature lowered to 0.1 after contamination reset."""
4638
+ monitor = proxy.SessionMonitor(context_window=262144)
4639
+ monitor.contamination_resets = 1 # has had a reset
4640
+
4641
+ body = {
4642
+ "model": "test",
4643
+ "messages": [{"role": "user", "content": "test"}],
4644
+ "tools": [
4645
+ {"name": "bash", "description": "Run", "input_schema": {"type": "object"}},
4646
+ ],
4647
+ }
4648
+ openai = proxy.build_openai_request(body, monitor)
4649
+ self.assertLessEqual(openai.get("temperature", 1.0), 0.1)
4650
+
4651
+ def test_normal_temp_without_contamination(self):
4652
+ """Without contamination resets, normal tool temp (0.3) is used."""
4653
+ monitor = proxy.SessionMonitor(context_window=262144)
4654
+ monitor.contamination_resets = 0
4655
+
4656
+ body = {
4657
+ "model": "test",
4658
+ "messages": [{"role": "user", "content": "test"}],
4659
+ "tools": [
4660
+ {"name": "bash", "description": "Run", "input_schema": {"type": "object"}},
4661
+ ],
4662
+ }
4663
+ openai = proxy.build_openai_request(body, monitor)
4664
+ self.assertAlmostEqual(openai.get("temperature", 1.0), 0.3, places=1)