@miller-tech/uap 1.20.24 → 1.20.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.24",
3
+ "version": "1.20.26",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -155,16 +155,16 @@ PROXY_TOOL_STATE_FORCED_BUDGET = int(
155
155
  )
156
156
  PROXY_TOOL_STATE_AUTO_BUDGET = int(os.environ.get("PROXY_TOOL_STATE_AUTO_BUDGET", "2"))
157
157
  PROXY_TOOL_STATE_STAGNATION_THRESHOLD = int(
158
- os.environ.get("PROXY_TOOL_STATE_STAGNATION_THRESHOLD", "5")
158
+ os.environ.get("PROXY_TOOL_STATE_STAGNATION_THRESHOLD", "8")
159
159
  )
160
160
  PROXY_TOOL_STATE_CYCLE_WINDOW = int(
161
- os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "4")
161
+ os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "3")
162
162
  )
163
163
  PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
164
164
  os.environ.get("PROXY_TOOL_STATE_FINALIZE_THRESHOLD", "18")
165
165
  )
166
166
  PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
167
- os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "1")
167
+ os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "3")
168
168
  )
169
169
  PROXY_COMPLETION_RECOVERY_MAX = int(
170
170
  os.environ.get("PROXY_COMPLETION_RECOVERY_MAX", "3")
@@ -189,6 +189,12 @@ PROXY_TOOL_NARROWING_EXPAND_ON_LOOP = os.environ.get(
189
189
  "off",
190
190
  "no",
191
191
  }
192
+ # Read-only tools that should be excluded as a class when any one cycles
193
+ _READ_ONLY_TOOL_CLASS = frozenset({
194
+ "read", "glob", "grep", "Read", "Glob", "Grep",
195
+ "search", "Search", "list_files", "ListFiles",
196
+ })
197
+
192
198
  PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() not in {
193
199
  "0",
194
200
  "false",
@@ -196,6 +202,9 @@ PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() no
196
202
  "no",
197
203
  }
198
204
  PROXY_SESSION_TTL_SECS = int(os.environ.get("PROXY_SESSION_TTL_SECS", "7200"))
205
+ PROXY_FINALIZE_CONTINUATION_MAX = int(
206
+ os.environ.get("PROXY_FINALIZE_CONTINUATION_MAX", "3")
207
+ )
199
208
  PROXY_STREAM_REASONING_FALLBACK = (
200
209
  os.environ.get("PROXY_STREAM_REASONING_FALLBACK", "off").strip().lower()
201
210
  )
@@ -621,6 +630,9 @@ class SessionMonitor:
621
630
  tool_call_history: list = field(
622
631
  default_factory=list
623
632
  ) # Recent tool call fingerprints
633
+ tool_target_history: dict = field(
634
+ default_factory=dict
635
+ ) # {tool_name: {target: count}} for read-only dedup
624
636
  consecutive_forced_count: int = (
625
637
  0 # How many times tool_choice was forced consecutively
626
638
  )
@@ -646,6 +658,8 @@ class SessionMonitor:
646
658
  cycling_tool_names: list = field(default_factory=list)
647
659
  last_response_garbled: bool = False # previous turn had garbled/malformed output
648
660
  finalize_turn_active: bool = False
661
+ finalize_continuation_count: int = 0
662
+ finalize_synthetic_tool_id: str = ""
649
663
  completion_required: bool = False
650
664
  completion_pending: bool = False
651
665
  completion_verified: bool = False
@@ -753,14 +767,47 @@ class SessionMonitor:
753
767
 
754
768
  # --- Token Loop Protection Methods ---
755
769
 
756
- def record_tool_calls(self, tool_names: list[str]):
757
- """Record tool call names for loop detection."""
758
- fingerprint = "|".join(sorted(tool_names)) if tool_names else ""
759
- self.tool_call_history.append(fingerprint)
770
+ def record_tool_calls(
771
+ self,
772
+ tool_names: list[str],
773
+ tool_targets: dict[str, str] | None = None,
774
+ fingerprint: str = "",
775
+ ):
776
+ """Record tool call names for loop detection.
777
+
778
+ tool_targets: optional {tool_name: target_key} for read-only dedup.
779
+ e.g. {"read": "/path/to/file", "glob": "**/*.ts"}
780
+ If a pre-computed fingerprint (with argument hashes) is provided,
781
+ use it directly. Otherwise fall back to name-only fingerprint.
782
+ """
783
+ fp = fingerprint or ("|".join(sorted(tool_names)) if tool_names else "")
784
+ self.tool_call_history.append(fp)
760
785
  # Keep last 30 entries
761
786
  if len(self.tool_call_history) > 30:
762
787
  self.tool_call_history = self.tool_call_history[-30:]
763
788
 
789
+ # Track read-only tool targets for dedup (Option 3)
790
+ if tool_targets:
791
+ for name, target in tool_targets.items():
792
+ if name.lower() in {n.lower() for n in _READ_ONLY_TOOL_CLASS} and target:
793
+ by_tool = self.tool_target_history.setdefault(name, {})
794
+ by_tool[target] = by_tool.get(target, 0) + 1
795
+
796
+ def has_duplicate_read_target(self, threshold: int = 2) -> tuple[bool, str]:
797
+ """Check if any read-only tool has re-read the same target >= threshold times.
798
+
799
+ Returns (is_duplicate, tool_name) for the first offending tool.
800
+ """
801
+ for tool_name, targets in self.tool_target_history.items():
802
+ for target, count in targets.items():
803
+ if count >= threshold:
804
+ return True, tool_name
805
+ return False, ""
806
+
807
+ def reset_tool_targets(self):
808
+ """Clear target history (on phase reset or fresh user text)."""
809
+ self.tool_target_history = {}
810
+
764
811
  def detect_tool_loop(self, window: int = 6) -> tuple[bool, int]:
765
812
  """Detect if the model is stuck in a tool call loop.
766
813
 
@@ -851,6 +898,7 @@ class SessionMonitor:
851
898
  self.tool_state_review_cycles = 0
852
899
  self.cycling_tool_names = []
853
900
  self.last_tool_fingerprint = ""
901
+ self.reset_tool_targets()
854
902
 
855
903
  def update_completion_state(self, anthropic_body: dict, has_tool_results: bool):
856
904
  self.completion_required = _should_enforce_completion_contract(anthropic_body)
@@ -2095,6 +2143,8 @@ def _resolve_state_machine_tool_choice(
2095
2143
  monitor.invalid_tool_call_streak = 0
2096
2144
  monitor.required_tool_miss_streak = 0
2097
2145
  monitor.reset_tool_turn_state(reason="fresh_user_text")
2146
+ monitor.finalize_continuation_count = 0
2147
+ monitor.finalize_synthetic_tool_id = ""
2098
2148
  return None, "fresh_user_text"
2099
2149
 
2100
2150
  active_loop = (
@@ -2113,6 +2163,8 @@ def _resolve_state_machine_tool_choice(
2113
2163
  monitor.invalid_tool_call_streak = 0
2114
2164
  monitor.required_tool_miss_streak = 0
2115
2165
  monitor.reset_tool_turn_state(reason="inactive_loop")
2166
+ monitor.finalize_continuation_count = 0
2167
+ monitor.finalize_synthetic_tool_id = ""
2116
2168
  return None, "inactive_loop"
2117
2169
 
2118
2170
  if monitor.tool_turn_phase == "bootstrap":
@@ -2158,6 +2210,16 @@ def _resolve_state_machine_tool_choice(
2158
2210
  return "finalize", "review_cycle_limit"
2159
2211
 
2160
2212
  if monitor.tool_turn_phase == "act":
2213
+ # Option 3: Early cycle break when same read target is hit 3+ times
2214
+ dup_target, dup_tool = monitor.has_duplicate_read_target(threshold=3)
2215
+ if dup_target and not cycle_looping and not stagnating:
2216
+ cycle_looping = True
2217
+ cycle_repeat = 2
2218
+ logger.warning(
2219
+ "TOOL STATE MACHINE: duplicate read target detected for '%s', triggering early cycle break",
2220
+ dup_tool,
2221
+ )
2222
+
2161
2223
  if cycle_looping or stagnating:
2162
2224
  reason = "cycle_detected" if cycle_looping else "stagnation"
2163
2225
  monitor.set_tool_turn_phase("review", reason=reason)
@@ -2169,9 +2231,15 @@ def _resolve_state_machine_tool_choice(
2169
2231
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2170
2232
  )
2171
2233
  # Capture which tools are cycling for narrowing/hint injection
2234
+ # Strip argument hashes (e.g. "glob:abc12345" -> "glob") so that
2235
+ # tool narrowing can match against actual tool names.
2172
2236
  window = max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
2173
2237
  recent = [fp for fp in monitor.tool_call_history[-window:] if fp]
2174
- monitor.cycling_tool_names = list(dict.fromkeys(recent))
2238
+ raw_names = []
2239
+ for fp in recent:
2240
+ for part in fp.split("|"):
2241
+ raw_names.append(part.split(":")[0])
2242
+ monitor.cycling_tool_names = list(dict.fromkeys(raw_names))
2175
2243
  logger.warning(
2176
2244
  "TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d cycling_tools=%s)",
2177
2245
  cycle_looping,
@@ -2184,7 +2252,11 @@ def _resolve_state_machine_tool_choice(
2184
2252
 
2185
2253
  if monitor.tool_state_forced_budget_remaining <= 0:
2186
2254
  monitor.set_tool_turn_phase("review", reason="forced_budget_exhausted")
2187
- monitor.tool_state_review_cycles += 1
2255
+ # Only count toward review cycle limit if there was an actual
2256
+ # cycle/stagnation detected. Budget exhaustion alone means the
2257
+ # model is working — it just used all its turns — not cycling.
2258
+ if cycle_looping or stagnating:
2259
+ monitor.tool_state_review_cycles += 1
2188
2260
  monitor.tool_state_auto_budget_remaining = max(
2189
2261
  1, PROXY_TOOL_STATE_AUTO_BUDGET
2190
2262
  )
@@ -2192,8 +2264,10 @@ def _resolve_state_machine_tool_choice(
2192
2264
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2193
2265
  )
2194
2266
  logger.warning(
2195
- "TOOL STATE MACHINE: forced budget exhausted, entering review (cycles=%d)",
2267
+ "TOOL STATE MACHINE: forced budget exhausted, entering review (cycles=%d cycling=%s stagnating=%s)",
2196
2268
  monitor.tool_state_review_cycles,
2269
+ cycle_looping,
2270
+ stagnating,
2197
2271
  )
2198
2272
  return "required", "forced_budget_exhausted"
2199
2273
 
@@ -2206,6 +2280,16 @@ def _resolve_state_machine_tool_choice(
2206
2280
  monitor.tool_state_forced_budget_remaining = max(
2207
2281
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2208
2282
  )
2283
+ # If stagnation cleared during review, the model tried a
2284
+ # different approach — reward by reducing cycle pressure and
2285
+ # lifting persistent tool exclusion.
2286
+ if monitor.tool_state_stagnation_streak == 0 and monitor.tool_state_review_cycles > 0:
2287
+ monitor.tool_state_review_cycles = max(0, monitor.tool_state_review_cycles - 1)
2288
+ monitor.cycling_tool_names = []
2289
+ logger.info(
2290
+ "TOOL STATE MACHINE: review_cycles decremented to %d, cycling exclusion lifted (stagnation cleared)",
2291
+ monitor.tool_state_review_cycles,
2292
+ )
2209
2293
  return "required", "review_complete"
2210
2294
 
2211
2295
  monitor.tool_state_auto_budget_remaining -= 1
@@ -2416,6 +2500,9 @@ def build_openai_request(
2416
2500
  n_msgs = len(anthropic_body.get("messages", []))
2417
2501
  has_tool_results = _conversation_has_tool_results(anthropic_body)
2418
2502
 
2503
+ # Detect and strip synthetic finalize continuation before fingerprinting
2504
+ _detect_and_strip_synthetic_continuation(anthropic_body, monitor)
2505
+
2419
2506
  # Record tool calls from the last assistant message for loop detection
2420
2507
  latest_tool_fingerprint = _record_last_assistant_tool_calls(
2421
2508
  anthropic_body, monitor
@@ -2504,44 +2591,61 @@ def build_openai_request(
2504
2591
  monitor.no_progress_streak = (
2505
2592
  0 if last_user_has_tool_result else monitor.no_progress_streak + 1
2506
2593
  )
2507
- # Option 1: Inject cycle-break instruction when entering review
2594
+ # Inject cycle-break instruction when entering review
2595
+ # Option 3 (Cycle 14): Escalate hint text based on review cycle count
2508
2596
  if (
2509
2597
  monitor.tool_turn_phase == "review"
2510
2598
  and state_reason in {"cycle_detected", "stagnation"}
2511
2599
  and monitor.cycling_tool_names
2512
2600
  ):
2513
2601
  cycling_names = ", ".join(monitor.cycling_tool_names)
2514
- cycle_hint = (
2515
- f"You have been repeatedly calling the same tool(s): {cycling_names}. "
2516
- "This is not making progress. Use a DIFFERENT tool to advance the task, "
2517
- "or call a tool that produces your final answer."
2518
- )
2602
+ cycles = monitor.tool_state_review_cycles
2603
+ if cycles <= 1:
2604
+ cycle_hint = (
2605
+ f"You have been repeatedly calling the same tool(s): {cycling_names}. "
2606
+ "This is not making progress. Use a DIFFERENT tool to advance the task, "
2607
+ "or call a tool that produces your final answer."
2608
+ )
2609
+ else:
2610
+ cycle_hint = (
2611
+ f"CRITICAL: You have cycled {cycling_names} for {cycles} review rounds without progress. "
2612
+ "State what you have accomplished so far and what the next DIFFERENT action should be. "
2613
+ "Do NOT call the same tool again. Choose a completely different approach or "
2614
+ "produce your final answer now."
2615
+ )
2519
2616
  messages = openai_body.get("messages", [])
2520
2617
  messages.append({"role": "user", "content": cycle_hint})
2521
2618
  openai_body["messages"] = messages
2522
2619
  logger.warning(
2523
- "CYCLE BREAK: injected hint about cycling tools: %s",
2620
+ "CYCLE BREAK: injected hint about cycling tools: %s (escalation=%d)",
2524
2621
  cycling_names,
2622
+ cycles,
2525
2623
  )
2526
- # Option 2: Narrow tools during review to exclude cycling tools
2624
+ # Narrow tools to exclude cycling tools
2625
+ # Option 1 (Cycle 13): if any cycling tool is read-only, exclude entire class
2626
+ # Option 1 (Cycle 14): persist exclusion during act phase too, not just review
2527
2627
  if (
2528
- monitor.tool_turn_phase == "review"
2529
- and monitor.cycling_tool_names
2628
+ monitor.cycling_tool_names
2530
2629
  and "tools" in openai_body
2531
2630
  ):
2631
+ exclude_set = set(monitor.cycling_tool_names)
2632
+ # Expand to full read-only class if any cycling tool is read-only
2633
+ if any(n.lower() in {c.lower() for c in _READ_ONLY_TOOL_CLASS} for n in exclude_set):
2634
+ exclude_set |= _READ_ONLY_TOOL_CLASS
2532
2635
  original_count = len(openai_body["tools"])
2533
2636
  narrowed = [
2534
2637
  t
2535
2638
  for t in openai_body["tools"]
2536
- if t.get("function", {}).get("name") not in monitor.cycling_tool_names
2639
+ if t.get("function", {}).get("name") not in exclude_set
2537
2640
  ]
2538
2641
  if narrowed:
2539
2642
  openai_body["tools"] = narrowed
2540
2643
  logger.warning(
2541
- "CYCLE BREAK: narrowed tools from %d to %d (excluded %s)",
2644
+ "CYCLE BREAK: narrowed tools from %d to %d (excluded %s, read_only_class=%s)",
2542
2645
  original_count,
2543
2646
  len(narrowed),
2544
2647
  monitor.cycling_tool_names,
2648
+ any(n.lower() in {c.lower() for c in _READ_ONLY_TOOL_CLASS} for n in monitor.cycling_tool_names),
2545
2649
  )
2546
2650
  else:
2547
2651
  logger.warning(
@@ -2602,13 +2706,117 @@ def build_openai_request(
2602
2706
  return openai_body
2603
2707
 
2604
2708
 
2709
+ def _tool_call_fingerprint(block: dict) -> str:
2710
+ """Create a fingerprint for a tool call that includes both name and a
2711
+ short hash of the arguments. This prevents false cycle detection when
2712
+ the same tool is called with different arguments (e.g. reading different
2713
+ files)."""
2714
+ name = block.get("name", "unknown")
2715
+ inp = block.get("input")
2716
+ if inp:
2717
+ arg_str = json.dumps(inp, sort_keys=True, separators=(",", ":"))
2718
+ arg_hash = hashlib.md5(arg_str.encode()).hexdigest()[:8]
2719
+ return f"{name}:{arg_hash}"
2720
+ return name
2721
+
2722
+
2723
+ def _detect_and_strip_synthetic_continuation(
2724
+ anthropic_body: dict, monitor: SessionMonitor
2725
+ ) -> bool:
2726
+ """Detect if the latest messages contain a synthetic finalize continuation
2727
+ tool_use/tool_result pair. If found, strip them from the conversation and
2728
+ reset the state machine so the model gets a fresh act cycle.
2729
+
2730
+ Returns True if a synthetic continuation was detected and handled.
2731
+ """
2732
+ synthetic_id = monitor.finalize_synthetic_tool_id
2733
+ if not synthetic_id:
2734
+ return False
2735
+
2736
+ messages = anthropic_body.get("messages", [])
2737
+ if not messages:
2738
+ return False
2739
+
2740
+ # Walk backwards to find the synthetic tool_result in a user message
2741
+ found = False
2742
+ for msg in reversed(messages):
2743
+ if msg.get("role") != "user":
2744
+ continue
2745
+ content = msg.get("content")
2746
+ if not isinstance(content, list):
2747
+ break
2748
+ has_synthetic = any(
2749
+ isinstance(b, dict)
2750
+ and b.get("type") == "tool_result"
2751
+ and b.get("tool_use_id") == synthetic_id
2752
+ for b in content
2753
+ )
2754
+ if not has_synthetic:
2755
+ break
2756
+
2757
+ # Strip synthetic tool_result from user message
2758
+ new_content = [
2759
+ b for b in content
2760
+ if not (
2761
+ isinstance(b, dict)
2762
+ and b.get("type") == "tool_result"
2763
+ and b.get("tool_use_id") == synthetic_id
2764
+ )
2765
+ ]
2766
+ if not new_content:
2767
+ msg["content"] = [{"type": "text", "text": "Continue working on the task."}]
2768
+ else:
2769
+ msg["content"] = new_content
2770
+
2771
+ # Strip synthetic tool_use from the preceding assistant message
2772
+ for asst_msg in reversed(messages):
2773
+ if asst_msg.get("role") != "assistant":
2774
+ continue
2775
+ asst_content = asst_msg.get("content")
2776
+ if isinstance(asst_content, list):
2777
+ asst_msg["content"] = [
2778
+ b for b in asst_content
2779
+ if not (
2780
+ isinstance(b, dict)
2781
+ and b.get("type") == "tool_use"
2782
+ and b.get("id") == synthetic_id
2783
+ )
2784
+ ]
2785
+ break
2786
+
2787
+ found = True
2788
+ break
2789
+
2790
+ if not found:
2791
+ return False
2792
+
2793
+ # Reset state machine for fresh act cycle
2794
+ monitor.finalize_synthetic_tool_id = ""
2795
+ monitor.reset_tool_turn_state(reason="finalize_continuation_resume")
2796
+ monitor.reset_completion_recovery()
2797
+ monitor.tool_call_history = []
2798
+ logger.info(
2799
+ "FINALIZE CONTINUATION: stripped synthetic tool id=%s, "
2800
+ "reset state machine for fresh act cycle (continuations=%d/%d)",
2801
+ synthetic_id,
2802
+ monitor.finalize_continuation_count,
2803
+ PROXY_FINALIZE_CONTINUATION_MAX,
2804
+ )
2805
+ return True
2806
+
2807
+
2605
2808
  def _record_last_assistant_tool_calls(
2606
2809
  anthropic_body: dict, monitor: SessionMonitor
2607
2810
  ) -> str:
2608
2811
  """Extract tool call names from the last assistant message and record
2609
- them in the session monitor for loop detection."""
2812
+ them in the session monitor for loop detection.
2813
+
2814
+ Fingerprints now include an argument hash so that the same tool called
2815
+ with different arguments (e.g. read(file_a) vs read(file_b)) produces
2816
+ distinct fingerprints, preventing false cycle/stagnation detection."""
2610
2817
  messages = anthropic_body.get("messages", [])
2611
- tool_names = []
2818
+ tool_fingerprints = []
2819
+ tool_targets: dict[str, str] = {}
2612
2820
  for msg in reversed(messages):
2613
2821
  if msg.get("role") != "assistant":
2614
2822
  continue
@@ -2616,11 +2824,28 @@ def _record_last_assistant_tool_calls(
2616
2824
  if isinstance(content, list):
2617
2825
  for block in content:
2618
2826
  if isinstance(block, dict) and block.get("type") == "tool_use":
2619
- tool_names.append(block.get("name", "unknown"))
2827
+ tool_fingerprints.append(_tool_call_fingerprint(block))
2828
+ # Extract target key for read-only dedup (Option 3)
2829
+ name = block.get("name", "unknown")
2830
+ inp = block.get("input", {})
2831
+ if isinstance(inp, dict):
2832
+ target = (
2833
+ inp.get("file_path")
2834
+ or inp.get("path")
2835
+ or inp.get("pattern")
2836
+ or inp.get("command", "")[:80]
2837
+ )
2838
+ if target:
2839
+ tool_targets[name] = str(target)
2620
2840
  break
2621
- if tool_names:
2622
- monitor.record_tool_calls(tool_names)
2623
- return "|".join(sorted(tool_names))
2841
+ if tool_fingerprints:
2842
+ fingerprint = "|".join(sorted(tool_fingerprints))
2843
+ monitor.record_tool_calls(
2844
+ [fp.split(":")[0] for fp in tool_fingerprints],
2845
+ tool_targets=tool_targets,
2846
+ fingerprint=fingerprint,
2847
+ )
2848
+ return fingerprint
2624
2849
  return ""
2625
2850
 
2626
2851
 
@@ -4750,16 +4975,20 @@ def _maybe_extract_text_tool_calls(openai_resp: dict) -> dict:
4750
4975
  return openai_resp
4751
4976
 
4752
4977
 
4753
- def _detect_and_truncate_degenerate_repetition(openai_resp: dict) -> dict:
4978
+ def _detect_and_truncate_degenerate_repetition(
4979
+ openai_resp: dict,
4980
+ ) -> tuple[dict, bool]:
4754
4981
  """Detect degenerate repetitive text and truncate at first repetition.
4755
4982
 
4756
4983
  When the model produces highly repetitive output (e.g. the same 20+ char
4757
4984
  substring repeated 10+ times), truncate at the first repetition boundary
4758
4985
  and set finish_reason to stop.
4986
+
4987
+ Returns (response, was_degenerate) so the caller can retry if needed.
4759
4988
  """
4760
4989
  text = _openai_message_text(openai_resp)
4761
4990
  if not text or len(text) < 200:
4762
- return openai_resp
4991
+ return openai_resp, False
4763
4992
 
4764
4993
  # Look for repeated substrings of length 20-100
4765
4994
  for substr_len in (60, 40, 20):
@@ -4788,8 +5017,70 @@ def _detect_and_truncate_degenerate_repetition(openai_resp: dict) -> dict:
4788
5017
  msg = choices[0].get("message", {})
4789
5018
  msg["content"] = truncated
4790
5019
  choices[0]["finish_reason"] = "stop"
4791
- return openai_resp
4792
- return openai_resp
5020
+ return openai_resp, True
5021
+ return openai_resp, False
5022
+
5023
+
5024
+ def _client_has_tool(anthropic_body: dict, tool_name: str) -> bool:
5025
+ """Check if the client's tool list contains a tool with the given name (case-insensitive)."""
5026
+ lower = tool_name.lower()
5027
+ return any(
5028
+ (t.get("name") or "").lower() == lower for t in anthropic_body.get("tools", [])
5029
+ )
5030
+
5031
+
5032
+ def _client_tool_name(anthropic_body: dict, tool_name: str) -> str:
5033
+ """Return the actual tool name as the client spells it (case-sensitive match)."""
5034
+ lower = tool_name.lower()
5035
+ for t in anthropic_body.get("tools", []):
5036
+ if (t.get("name") or "").lower() == lower:
5037
+ return t["name"]
5038
+ return tool_name
5039
+
5040
+
5041
+ def _inject_synthetic_continuation(
5042
+ anthropic_resp: dict, monitor: SessionMonitor, anthropic_body: dict
5043
+ ) -> dict:
5044
+ """Inject a synthetic tool_use into a finalize-turn response to keep the
5045
+ client's agentic loop alive.
5046
+
5047
+ Appends a no-op Read("/dev/null") tool_use block and changes stop_reason
5048
+ from "end_turn" to "tool_use" so the client continues sending requests.
5049
+ """
5050
+ # Pick a safe tool the client knows about (case-insensitive match,
5051
+ # then use the client's actual casing for the tool name)
5052
+ if _client_has_tool(anthropic_body, "read"):
5053
+ tool_name = _client_tool_name(anthropic_body, "read")
5054
+ tool_input = {"file_path": "/dev/null"}
5055
+ elif _client_has_tool(anthropic_body, "bash"):
5056
+ tool_name = _client_tool_name(anthropic_body, "bash")
5057
+ tool_input = {"command": "true", "description": "continuation ping"}
5058
+ else:
5059
+ logger.warning("FINALIZE CONTINUATION: no suitable tool found, skipping injection")
5060
+ return anthropic_resp
5061
+
5062
+ synthetic_id = f"toolu_{uuid.uuid4().hex[:12]}"
5063
+ monitor.finalize_synthetic_tool_id = synthetic_id
5064
+ monitor.finalize_continuation_count += 1
5065
+
5066
+ content = anthropic_resp.get("content", [])
5067
+ content.append({
5068
+ "type": "tool_use",
5069
+ "id": synthetic_id,
5070
+ "name": tool_name,
5071
+ "input": tool_input,
5072
+ })
5073
+ anthropic_resp["content"] = content
5074
+ anthropic_resp["stop_reason"] = "tool_use"
5075
+
5076
+ logger.info(
5077
+ "FINALIZE CONTINUATION: injected synthetic %s tool_use id=%s (count=%d/%d)",
5078
+ tool_name,
5079
+ synthetic_id,
5080
+ monitor.finalize_continuation_count,
5081
+ PROXY_FINALIZE_CONTINUATION_MAX,
5082
+ )
5083
+ return anthropic_resp
4793
5084
 
4794
5085
 
4795
5086
  def openai_to_anthropic_response(openai_resp: dict, model: str) -> dict:
@@ -5623,8 +5914,51 @@ async def messages(request: Request):
5623
5914
  session_id,
5624
5915
  )
5625
5916
 
5626
- openai_resp = _detect_and_truncate_degenerate_repetition(openai_resp)
5917
+ openai_resp, was_degenerate = _detect_and_truncate_degenerate_repetition(openai_resp)
5918
+ if was_degenerate:
5919
+ # Retry with constrained parameters to avoid degenerate output.
5920
+ # With tools: force tool_choice=required for a useful tool call.
5921
+ # Without tools (finalize): retry with capped max_tokens for clean text.
5922
+ has_tools = bool(strict_body.get("tools"))
5923
+ retry_body = dict(strict_body)
5924
+ retry_body["max_tokens"] = 2048
5925
+ retry_body["temperature"] = 0.1
5926
+ retry_body["stream"] = False
5927
+ if has_tools:
5928
+ retry_body["tool_choice"] = "required"
5929
+ logger.warning("DEGENERATE RETRY: retrying with tool_choice=required max_tokens=2048")
5930
+ else:
5931
+ logger.warning("DEGENERATE RETRY: retrying text-only with max_tokens=2048 temp=0.1")
5932
+ try:
5933
+ retry_resp = await _post_with_generation_timeout(
5934
+ client, f"{LLAMA_CPP_BASE}/chat/completions", retry_body,
5935
+ {"Content-Type": "application/json"},
5936
+ )
5937
+ if retry_resp.status_code == 200:
5938
+ retry_data = retry_resp.json()
5939
+ retry_text = _openai_message_text(retry_data)
5940
+ _, retry_degenerate = _detect_and_truncate_degenerate_repetition(retry_data)
5941
+ if retry_degenerate:
5942
+ logger.info("DEGENERATE RETRY: retry also degenerate, using truncated original")
5943
+ elif has_tools and (retry_data.get("choices", [{}])[0]
5944
+ .get("message", {}).get("tool_calls")):
5945
+ logger.info("DEGENERATE RETRY: success, got tool call")
5946
+ openai_resp = retry_data
5947
+ elif not has_tools and retry_text and len(retry_text) > 50:
5948
+ logger.info("DEGENERATE RETRY: success, got clean text (%d chars)", len(retry_text))
5949
+ openai_resp = retry_data
5950
+ else:
5951
+ logger.info("DEGENERATE RETRY: retry insufficient, using truncated original")
5952
+ except Exception as exc:
5953
+ logger.warning("DEGENERATE RETRY: failed: %s", exc)
5627
5954
  anthropic_resp = openai_to_anthropic_response(openai_resp, model)
5955
+ # FINALIZE CONTINUATION: inject synthetic tool_use to keep client loop alive
5956
+ if (
5957
+ monitor.finalize_turn_active
5958
+ and monitor.finalize_continuation_count < PROXY_FINALIZE_CONTINUATION_MAX
5959
+ and anthropic_resp.get("stop_reason") == "end_turn"
5960
+ ):
5961
+ anthropic_resp = _inject_synthetic_continuation(anthropic_resp, monitor, body)
5628
5962
  monitor.record_response(anthropic_resp.get("usage", {}).get("output_tokens", 0))
5629
5963
  # Update last_input_tokens from upstream's actual prompt_tokens
5630
5964
  upstream_input = anthropic_resp.get("usage", {}).get("input_tokens", 0)
@@ -5962,8 +6296,38 @@ async def messages(request: Request):
5962
6296
  monitor.invalid_tool_call_streak = 0
5963
6297
  monitor.required_tool_miss_streak = 0
5964
6298
 
5965
- openai_resp = _detect_and_truncate_degenerate_repetition(openai_resp)
6299
+ openai_resp, was_degenerate = _detect_and_truncate_degenerate_repetition(openai_resp)
6300
+ # Degenerate retry for non-guarded stream path
6301
+ if was_degenerate and openai_body.get("tools"):
6302
+ logger.warning("DEGENERATE RETRY (stream): retrying with tool_choice=required max_tokens=2048")
6303
+ retry_body = dict(openai_body)
6304
+ retry_body["tool_choice"] = "required"
6305
+ retry_body["max_tokens"] = 2048
6306
+ retry_body["temperature"] = 0.1
6307
+ retry_body["stream"] = False
6308
+ try:
6309
+ retry_resp = await _post_with_generation_timeout(
6310
+ client, f"{LLAMA_CPP_BASE}/chat/completions", retry_body,
6311
+ {"Content-Type": "application/json"},
6312
+ )
6313
+ if retry_resp.status_code == 200:
6314
+ retry_data = retry_resp.json()
6315
+ if (retry_data.get("choices", [{}])[0]
6316
+ .get("message", {}).get("tool_calls")):
6317
+ logger.info("DEGENERATE RETRY (stream): success, got tool call")
6318
+ openai_resp = retry_data
6319
+ else:
6320
+ logger.info("DEGENERATE RETRY (stream): no tool call, using truncated")
6321
+ except Exception as exc:
6322
+ logger.warning("DEGENERATE RETRY (stream): failed: %s", exc)
5966
6323
  anthropic_resp = openai_to_anthropic_response(openai_resp, model)
6324
+ # FINALIZE CONTINUATION: inject synthetic tool_use (non-guarded stream path)
6325
+ if (
6326
+ monitor.finalize_turn_active
6327
+ and monitor.finalize_continuation_count < PROXY_FINALIZE_CONTINUATION_MAX
6328
+ and anthropic_resp.get("stop_reason") == "end_turn"
6329
+ ):
6330
+ anthropic_resp = _inject_synthetic_continuation(anthropic_resp, monitor, body)
5967
6331
 
5968
6332
  # Track output tokens in session monitor
5969
6333
  output_tokens = anthropic_resp.get("usage", {}).get("output_tokens", 0)
@@ -1892,12 +1892,13 @@ class TestToolTurnControls(unittest.TestCase):
1892
1892
  monitor = proxy.SessionMonitor(context_window=262144)
1893
1893
  monitor.tool_turn_phase = "act"
1894
1894
  monitor.tool_state_forced_budget_remaining = 20
1895
+ # Use hash-format fingerprints to match _tool_call_fingerprint output
1895
1896
  monitor.tool_call_history = [
1896
- "Bash",
1897
+ "Bash:1e7b8d07",
1897
1898
  "TaskOutput",
1898
- "Bash",
1899
+ "Bash:1e7b8d07",
1899
1900
  "TaskOutput",
1900
- "Bash",
1901
+ "Bash:1e7b8d07",
1901
1902
  "TaskOutput",
1902
1903
  ]
1903
1904
  monitor.last_tool_fingerprint = "TaskOutput"
@@ -2076,7 +2077,9 @@ class TestToolTurnControls(unittest.TestCase):
2076
2077
  # Review phase now keeps required to prevent end-turn escape
2077
2078
  self.assertEqual(openai.get("tool_choice"), "required")
2078
2079
  self.assertEqual(monitor.tool_turn_phase, "review")
2079
- self.assertEqual(monitor.tool_state_review_cycles, 1)
2080
+ # review_cycles only increments when cycle_looping or stagnating,
2081
+ # not on mere budget exhaustion (model was working, not cycling)
2082
+ self.assertEqual(monitor.tool_state_review_cycles, 0)
2080
2083
  finally:
2081
2084
  setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
2082
2085
  setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
@@ -2242,7 +2245,11 @@ class TestToolTurnControls(unittest.TestCase):
2242
2245
  monitor = proxy.SessionMonitor(context_window=262144)
2243
2246
  monitor.tool_turn_phase = "act"
2244
2247
  monitor.tool_state_stagnation_streak = 4
2245
- monitor.tool_call_history = ["Bash", "TaskOutput", "Bash", "TaskOutput"]
2248
+ # Use hash-format fingerprints to match _tool_call_fingerprint output
2249
+ monitor.tool_call_history = [
2250
+ "Bash:1e7b8d07", "TaskOutput", "Bash:1e7b8d07", "TaskOutput",
2251
+ "Bash:1e7b8d07", "TaskOutput",
2252
+ ]
2246
2253
  monitor.last_tool_fingerprint = "TaskOutput"
2247
2254
 
2248
2255
  body = {
@@ -3262,8 +3269,11 @@ class TestCycleBreakOptions(unittest.TestCase):
3262
3269
  monitor = proxy.SessionMonitor(context_window=262144)
3263
3270
  monitor.tool_turn_phase = "act"
3264
3271
  monitor.tool_state_forced_budget_remaining = 20
3265
- monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
3266
- monitor.last_tool_fingerprint = "Bash"
3272
+ # Hash-format fingerprints matching Bash+{"command":"ls"}
3273
+ monitor.tool_call_history = [
3274
+ "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad",
3275
+ ]
3276
+ monitor.last_tool_fingerprint = "Bash:781c24ad"
3267
3277
 
3268
3278
  body = {
3269
3279
  "model": "test",
@@ -3323,8 +3333,11 @@ class TestCycleBreakOptions(unittest.TestCase):
3323
3333
  monitor = proxy.SessionMonitor(context_window=262144)
3324
3334
  monitor.tool_turn_phase = "act"
3325
3335
  monitor.tool_state_forced_budget_remaining = 20
3326
- monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
3327
- monitor.last_tool_fingerprint = "Bash"
3336
+ # Hash-format fingerprints matching Bash+{"command":"ls"}
3337
+ monitor.tool_call_history = [
3338
+ "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad",
3339
+ ]
3340
+ monitor.last_tool_fingerprint = "Bash:781c24ad"
3328
3341
 
3329
3342
  body = {
3330
3343
  "model": "test",
@@ -3369,9 +3382,9 @@ class TestCycleBreakOptions(unittest.TestCase):
3369
3382
  """Option 3: default forced budget reduced from 24 to 12."""
3370
3383
  self.assertEqual(proxy.PROXY_TOOL_STATE_FORCED_BUDGET, 12)
3371
3384
 
3372
- def test_review_cycle_limit_default_is_1(self):
3373
- """Option 4: default review cycle limit reduced from 2 to 1."""
3374
- self.assertEqual(proxy.PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT, 1)
3385
+ def test_review_cycle_limit_default_is_3(self):
3386
+ """Option 4: default review cycle limit is 3."""
3387
+ self.assertEqual(proxy.PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT, 3)
3375
3388
 
3376
3389
  def test_cycling_tool_names_cleared_on_reset(self):
3377
3390
  """cycling_tool_names is cleared when tool turn state resets."""
@@ -3450,8 +3463,9 @@ class TestDegenerateRepetitionDetection(unittest.TestCase):
3450
3463
  openai_resp = {
3451
3464
  "choices": [{"message": {"content": repeated}, "finish_reason": "length"}]
3452
3465
  }
3453
- result = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3466
+ result, truncated = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3454
3467
  truncated_text = result["choices"][0]["message"]["content"]
3468
+ self.assertTrue(truncated)
3455
3469
  self.assertLess(len(truncated_text), len(repeated))
3456
3470
  self.assertEqual(result["choices"][0]["finish_reason"], "stop")
3457
3471
 
@@ -3461,7 +3475,8 @@ class TestDegenerateRepetitionDetection(unittest.TestCase):
3461
3475
  openai_resp = {
3462
3476
  "choices": [{"message": {"content": text}, "finish_reason": "stop"}]
3463
3477
  }
3464
- result = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3478
+ result, truncated = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3479
+ self.assertFalse(truncated)
3465
3480
  self.assertEqual(result["choices"][0]["message"]["content"], text)
3466
3481
 
3467
3482
  def test_preserves_short_text(self):
@@ -3470,7 +3485,8 @@ class TestDegenerateRepetitionDetection(unittest.TestCase):
3470
3485
  openai_resp = {
3471
3486
  "choices": [{"message": {"content": text}, "finish_reason": "stop"}]
3472
3487
  }
3473
- result = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3488
+ result, truncated = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3489
+ self.assertFalse(truncated)
3474
3490
  self.assertEqual(result["choices"][0]["message"]["content"], text)
3475
3491
 
3476
3492
  def test_max_tokens_floor_skipped_for_non_tool_requests(self):
@@ -4220,3 +4236,342 @@ class TestReviewPhaseBootstrapReset(unittest.TestCase):
4220
4236
  # The bootstrap reset only triggers for review phase
4221
4237
  self.assertNotEqual(m.tool_turn_phase, "review")
4222
4238
  # In act phase, the normal guardrail fallback path runs instead
4239
+
4240
+
4241
+ class TestReadOnlyCycleClassExclusion(unittest.TestCase):
4242
+ """Tests for Option 1: read-only tool class exclusion on cycle break,
4243
+ Option 2: reduced cycle window (3), and Option 3: duplicate target dedup."""
4244
+
4245
+ def _make_body_with_tools(self, tool_names):
4246
+ """Build a minimal Anthropic body with named tools and a tool_result."""
4247
+ tools = [
4248
+ {"name": n, "description": f"{n} tool", "input_schema": {"type": "object"}}
4249
+ for n in tool_names
4250
+ ]
4251
+ return {
4252
+ "model": "test",
4253
+ "messages": [
4254
+ {"role": "user", "content": "do something"},
4255
+ {
4256
+ "role": "assistant",
4257
+ "content": [
4258
+ {
4259
+ "type": "tool_use",
4260
+ "id": "toolu_1",
4261
+ "name": tool_names[0],
4262
+ "input": {"file_path": "/some/file.ts"},
4263
+ }
4264
+ ],
4265
+ },
4266
+ {
4267
+ "role": "user",
4268
+ "content": [
4269
+ {"type": "tool_result", "tool_use_id": "toolu_1", "content": "ok"}
4270
+ ],
4271
+ },
4272
+ ],
4273
+ "tools": tools,
4274
+ }
4275
+
4276
+ def test_read_only_class_exclusion_expands(self):
4277
+ """When 'read' is cycling, all read-only tools are excluded, not just 'read'."""
4278
+ old_vals = {
4279
+ "PROXY_TOOL_STATE_MACHINE": getattr(proxy, "PROXY_TOOL_STATE_MACHINE"),
4280
+ "PROXY_TOOL_STATE_MIN_MESSAGES": getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES"),
4281
+ "PROXY_TOOL_STATE_FORCED_BUDGET": getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET"),
4282
+ "PROXY_TOOL_STATE_CYCLE_WINDOW": getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW"),
4283
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD": getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"),
4284
+ }
4285
+ try:
4286
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4287
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4288
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 3)
4289
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4290
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4291
+
4292
+ all_tools = ["read", "glob", "grep", "bash", "write", "edit"]
4293
+ body = self._make_body_with_tools(all_tools)
4294
+ monitor = proxy.SessionMonitor(context_window=262144)
4295
+
4296
+ # Simulate cycling on 'read' by recording 3 identical fingerprints
4297
+ # Hash-format matching read+{"file_path":"/some/file.ts"}
4298
+ fp = "read:cfb28722"
4299
+ monitor.record_tool_calls(["read"], fingerprint=fp)
4300
+ monitor.record_tool_calls(["read"], fingerprint=fp)
4301
+ monitor.record_tool_calls(["read"], fingerprint=fp)
4302
+
4303
+ openai_body = proxy.build_openai_request(body, monitor)
4304
+
4305
+ # After cycle break, the tools in the body should exclude ALL
4306
+ # read-only tools, not just 'read'
4307
+ remaining_names = [
4308
+ t.get("function", {}).get("name") for t in openai_body.get("tools", [])
4309
+ ]
4310
+ self.assertNotIn("read", remaining_names)
4311
+ self.assertNotIn("glob", remaining_names)
4312
+ self.assertNotIn("grep", remaining_names)
4313
+ # Write/action tools should remain
4314
+ self.assertIn("bash", remaining_names)
4315
+ self.assertIn("write", remaining_names)
4316
+ self.assertIn("edit", remaining_names)
4317
+ finally:
4318
+ for k, v in old_vals.items():
4319
+ setattr(proxy, k, v)
4320
+
4321
+ def test_non_read_tool_cycling_no_class_expansion(self):
4322
+ """When 'bash' is cycling, only 'bash' is excluded, not read-only tools."""
4323
+ old_vals = {
4324
+ "PROXY_TOOL_STATE_MACHINE": getattr(proxy, "PROXY_TOOL_STATE_MACHINE"),
4325
+ "PROXY_TOOL_STATE_MIN_MESSAGES": getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES"),
4326
+ "PROXY_TOOL_STATE_FORCED_BUDGET": getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET"),
4327
+ "PROXY_TOOL_STATE_CYCLE_WINDOW": getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW"),
4328
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD": getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"),
4329
+ }
4330
+ try:
4331
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4332
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4333
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 3)
4334
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4335
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4336
+
4337
+ all_tools = ["read", "glob", "grep", "bash", "write", "edit"]
4338
+ body = self._make_body_with_tools(all_tools)
4339
+ # Change the assistant tool_use to bash
4340
+ body["messages"][1]["content"][0]["name"] = "bash"
4341
+ body["messages"][1]["content"][0]["input"] = {"command": "ls"}
4342
+ monitor = proxy.SessionMonitor(context_window=262144)
4343
+
4344
+ # Use hash-format fingerprints matching bash+{"command":"ls"}
4345
+ fp = "bash:781c24ad"
4346
+ monitor.record_tool_calls(["bash"], fingerprint=fp)
4347
+ monitor.record_tool_calls(["bash"], fingerprint=fp)
4348
+ monitor.record_tool_calls(["bash"], fingerprint=fp)
4349
+
4350
+ openai_body = proxy.build_openai_request(body, monitor)
4351
+
4352
+ remaining_names = [
4353
+ t.get("function", {}).get("name") for t in openai_body.get("tools", [])
4354
+ ]
4355
+ self.assertNotIn("bash", remaining_names)
4356
+ # Read-only tools should still be available
4357
+ self.assertIn("read", remaining_names)
4358
+ self.assertIn("glob", remaining_names)
4359
+ self.assertIn("grep", remaining_names)
4360
+ finally:
4361
+ for k, v in old_vals.items():
4362
+ setattr(proxy, k, v)
4363
+
4364
+ def test_duplicate_read_target_triggers_early_cycle(self):
4365
+ """Option 3: reading same file 3+ times triggers early cycle break."""
4366
+ monitor = proxy.SessionMonitor(context_window=262144)
4367
+
4368
+ # Record 3 reads of same target
4369
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/to/file.ts"})
4370
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/to/file.ts"})
4371
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/to/file.ts"})
4372
+
4373
+ dup, tool = monitor.has_duplicate_read_target(threshold=3)
4374
+ self.assertTrue(dup)
4375
+ self.assertEqual(tool, "read")
4376
+
4377
+ def test_different_read_targets_no_duplicate(self):
4378
+ """Option 3: reading different files does NOT trigger duplicate detection."""
4379
+ monitor = proxy.SessionMonitor(context_window=262144)
4380
+
4381
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/a.ts"})
4382
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/b.ts"})
4383
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/c.ts"})
4384
+
4385
+ dup, _ = monitor.has_duplicate_read_target(threshold=3)
4386
+ self.assertFalse(dup)
4387
+
4388
+ def test_cycle_window_default_is_3(self):
4389
+ """Option 2: verify default cycle window is now 3."""
4390
+ # This tests the constant directly
4391
+ self.assertEqual(
4392
+ int(proxy.os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "3")), 3
4393
+ )
4394
+
4395
+ def test_target_history_reset_on_state_reset(self):
4396
+ """Target history is cleared when tool state resets."""
4397
+ monitor = proxy.SessionMonitor(context_window=262144)
4398
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/file.ts"})
4399
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/file.ts"})
4400
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/file.ts"})
4401
+
4402
+ dup, _ = monitor.has_duplicate_read_target(threshold=3)
4403
+ self.assertTrue(dup)
4404
+
4405
+ monitor.reset_tool_turn_state(reason="test_reset")
4406
+
4407
+ dup, _ = monitor.has_duplicate_read_target(threshold=3)
4408
+ self.assertFalse(dup)
4409
+
4410
+
4411
+ class TestPersistentCycleExclusion(unittest.TestCase):
4412
+ """Tests for Cycle 14: persistent exclusion, escalating hints, and
4413
+ exclusion across review→act transitions."""
4414
+
4415
+ def _make_body_with_tools(self, tool_names, active_tool="bash", active_input=None):
4416
+ tools = [
4417
+ {"name": n, "description": f"{n} tool", "input_schema": {"type": "object"}}
4418
+ for n in tool_names
4419
+ ]
4420
+ inp = active_input or {"command": "ls"}
4421
+ return {
4422
+ "model": "test",
4423
+ "messages": [
4424
+ {"role": "user", "content": "do something"},
4425
+ {
4426
+ "role": "assistant",
4427
+ "content": [
4428
+ {"type": "tool_use", "id": "t1", "name": active_tool, "input": inp}
4429
+ ],
4430
+ },
4431
+ {
4432
+ "role": "user",
4433
+ "content": [
4434
+ {"type": "tool_result", "tool_use_id": "t1", "content": "ok"}
4435
+ ],
4436
+ },
4437
+ ],
4438
+ "tools": tools,
4439
+ }
4440
+
4441
+ def test_exclusion_persists_through_act_phase(self):
4442
+ """Option 1: cycling_tool_names exclusion persists in act phase after review."""
4443
+ old_vals = {}
4444
+ for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
4445
+ "PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
4446
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
4447
+ old_vals[k] = getattr(proxy, k)
4448
+ try:
4449
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4450
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4451
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 6)
4452
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4453
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4454
+
4455
+ all_tools = ["bash", "read", "write", "edit"]
4456
+ body = self._make_body_with_tools(all_tools)
4457
+ monitor = proxy.SessionMonitor(context_window=262144)
4458
+
4459
+ # Simulate bash cycling that triggers review
4460
+ monitor.cycling_tool_names = ["bash"]
4461
+ monitor.tool_turn_phase = "act"
4462
+ monitor.tool_state_forced_budget_remaining = 5
4463
+
4464
+ openai = proxy.build_openai_request(body, monitor)
4465
+
4466
+ # In act phase with cycling_tool_names set, bash should be excluded
4467
+ remaining = [t["function"]["name"] for t in openai.get("tools", [])]
4468
+ self.assertNotIn("bash", remaining)
4469
+ self.assertIn("read", remaining)
4470
+ self.assertIn("write", remaining)
4471
+ finally:
4472
+ for k, v in old_vals.items():
4473
+ setattr(proxy, k, v)
4474
+
4475
+ def test_exclusion_cleared_on_stagnation_clear(self):
4476
+ """Option 1: cycling exclusion is lifted when stagnation clears in review."""
4477
+ monitor = proxy.SessionMonitor(context_window=262144)
4478
+ monitor.tool_turn_phase = "review"
4479
+ monitor.tool_state_review_cycles = 1
4480
+ monitor.tool_state_stagnation_streak = 0 # stagnation cleared
4481
+ monitor.cycling_tool_names = ["bash"]
4482
+ monitor.tool_state_auto_budget_remaining = 0
4483
+ monitor.tool_state_forced_budget_remaining = 6
4484
+
4485
+ # This should transition review→act and clear cycling names
4486
+ old_vals = {}
4487
+ for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
4488
+ "PROXY_TOOL_STATE_FORCED_BUDGET"]:
4489
+ old_vals[k] = getattr(proxy, k)
4490
+ try:
4491
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4492
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4493
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 6)
4494
+
4495
+ body = self._make_body_with_tools(["bash", "read", "write"])
4496
+ proxy.build_openai_request(body, monitor)
4497
+
4498
+ self.assertEqual(monitor.tool_turn_phase, "act")
4499
+ self.assertEqual(monitor.cycling_tool_names, [])
4500
+ finally:
4501
+ for k, v in old_vals.items():
4502
+ setattr(proxy, k, v)
4503
+
4504
+ def test_escalated_hint_on_cycle_2(self):
4505
+ """Option 3: cycle 2+ gets escalated CRITICAL hint text."""
4506
+ old_vals = {}
4507
+ for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
4508
+ "PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
4509
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
4510
+ old_vals[k] = getattr(proxy, k)
4511
+ try:
4512
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4513
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4514
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
4515
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4516
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4517
+
4518
+ all_tools = ["bash", "read", "write"]
4519
+ body = self._make_body_with_tools(all_tools)
4520
+ monitor = proxy.SessionMonitor(context_window=262144)
4521
+ # Pre-set as if we've already been through 1 review cycle
4522
+ monitor.tool_turn_phase = "act"
4523
+ monitor.tool_state_review_cycles = 1
4524
+ monitor.tool_state_forced_budget_remaining = 20
4525
+ monitor.tool_state_stagnation_streak = 3
4526
+ fp = "bash:781c24ad"
4527
+ monitor.tool_call_history = [fp, fp, fp]
4528
+ monitor.last_tool_fingerprint = fp
4529
+
4530
+ openai = proxy.build_openai_request(body, monitor)
4531
+
4532
+ # Should now be in review with cycles=2 and escalated hint
4533
+ self.assertEqual(monitor.tool_turn_phase, "review")
4534
+ self.assertEqual(monitor.tool_state_review_cycles, 2)
4535
+ messages = openai.get("messages", [])
4536
+ last_user = [m for m in messages if m.get("role") == "user"][-1]
4537
+ self.assertIn("CRITICAL", last_user["content"])
4538
+ self.assertIn("2 review rounds", last_user["content"])
4539
+ finally:
4540
+ for k, v in old_vals.items():
4541
+ setattr(proxy, k, v)
4542
+
4543
+ def test_mild_hint_on_cycle_1(self):
4544
+ """Option 3: cycle 1 gets mild hint, not escalated."""
4545
+ old_vals = {}
4546
+ for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
4547
+ "PROXY_TOOL_STATE_FORCED_BUDGET", "PROXY_TOOL_STATE_CYCLE_WINDOW",
4548
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"]:
4549
+ old_vals[k] = getattr(proxy, k)
4550
+ try:
4551
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4552
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4553
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
4554
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4555
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4556
+
4557
+ body = self._make_body_with_tools(["bash", "read", "write"])
4558
+ monitor = proxy.SessionMonitor(context_window=262144)
4559
+ monitor.tool_turn_phase = "act"
4560
+ monitor.tool_state_review_cycles = 0
4561
+ monitor.tool_state_forced_budget_remaining = 20
4562
+ monitor.tool_state_stagnation_streak = 3
4563
+ fp = "bash:781c24ad"
4564
+ monitor.tool_call_history = [fp, fp, fp]
4565
+ monitor.last_tool_fingerprint = fp
4566
+
4567
+ openai = proxy.build_openai_request(body, monitor)
4568
+
4569
+ self.assertEqual(monitor.tool_turn_phase, "review")
4570
+ self.assertEqual(monitor.tool_state_review_cycles, 1)
4571
+ messages = openai.get("messages", [])
4572
+ last_user = [m for m in messages if m.get("role") == "user"][-1]
4573
+ self.assertNotIn("CRITICAL", last_user["content"])
4574
+ self.assertIn("DIFFERENT tool", last_user["content"])
4575
+ finally:
4576
+ for k, v in old_vals.items():
4577
+ setattr(proxy, k, v)