@miller-tech/uap 1.20.23 → 1.20.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.23",
3
+ "version": "1.20.25",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -155,16 +155,16 @@ PROXY_TOOL_STATE_FORCED_BUDGET = int(
155
155
  )
156
156
  PROXY_TOOL_STATE_AUTO_BUDGET = int(os.environ.get("PROXY_TOOL_STATE_AUTO_BUDGET", "2"))
157
157
  PROXY_TOOL_STATE_STAGNATION_THRESHOLD = int(
158
- os.environ.get("PROXY_TOOL_STATE_STAGNATION_THRESHOLD", "5")
158
+ os.environ.get("PROXY_TOOL_STATE_STAGNATION_THRESHOLD", "8")
159
159
  )
160
160
  PROXY_TOOL_STATE_CYCLE_WINDOW = int(
161
- os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "4")
161
+ os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "3")
162
162
  )
163
163
  PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
164
164
  os.environ.get("PROXY_TOOL_STATE_FINALIZE_THRESHOLD", "18")
165
165
  )
166
166
  PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
167
- os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "1")
167
+ os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "3")
168
168
  )
169
169
  PROXY_COMPLETION_RECOVERY_MAX = int(
170
170
  os.environ.get("PROXY_COMPLETION_RECOVERY_MAX", "3")
@@ -189,6 +189,12 @@ PROXY_TOOL_NARROWING_EXPAND_ON_LOOP = os.environ.get(
189
189
  "off",
190
190
  "no",
191
191
  }
192
+ # Read-only tools that should be excluded as a class when any one cycles
193
+ _READ_ONLY_TOOL_CLASS = frozenset({
194
+ "read", "glob", "grep", "Read", "Glob", "Grep",
195
+ "search", "Search", "list_files", "ListFiles",
196
+ })
197
+
192
198
  PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() not in {
193
199
  "0",
194
200
  "false",
@@ -196,6 +202,9 @@ PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() no
196
202
  "no",
197
203
  }
198
204
  PROXY_SESSION_TTL_SECS = int(os.environ.get("PROXY_SESSION_TTL_SECS", "7200"))
205
+ PROXY_FINALIZE_CONTINUATION_MAX = int(
206
+ os.environ.get("PROXY_FINALIZE_CONTINUATION_MAX", "3")
207
+ )
199
208
  PROXY_STREAM_REASONING_FALLBACK = (
200
209
  os.environ.get("PROXY_STREAM_REASONING_FALLBACK", "off").strip().lower()
201
210
  )
@@ -621,6 +630,9 @@ class SessionMonitor:
621
630
  tool_call_history: list = field(
622
631
  default_factory=list
623
632
  ) # Recent tool call fingerprints
633
+ tool_target_history: dict = field(
634
+ default_factory=dict
635
+ ) # {tool_name: {target: count}} for read-only dedup
624
636
  consecutive_forced_count: int = (
625
637
  0 # How many times tool_choice was forced consecutively
626
638
  )
@@ -646,6 +658,8 @@ class SessionMonitor:
646
658
  cycling_tool_names: list = field(default_factory=list)
647
659
  last_response_garbled: bool = False # previous turn had garbled/malformed output
648
660
  finalize_turn_active: bool = False
661
+ finalize_continuation_count: int = 0
662
+ finalize_synthetic_tool_id: str = ""
649
663
  completion_required: bool = False
650
664
  completion_pending: bool = False
651
665
  completion_verified: bool = False
@@ -753,14 +767,47 @@ class SessionMonitor:
753
767
 
754
768
  # --- Token Loop Protection Methods ---
755
769
 
756
- def record_tool_calls(self, tool_names: list[str]):
757
- """Record tool call names for loop detection."""
758
- fingerprint = "|".join(sorted(tool_names)) if tool_names else ""
759
- self.tool_call_history.append(fingerprint)
770
+ def record_tool_calls(
771
+ self,
772
+ tool_names: list[str],
773
+ tool_targets: dict[str, str] | None = None,
774
+ fingerprint: str = "",
775
+ ):
776
+ """Record tool call names for loop detection.
777
+
778
+ tool_targets: optional {tool_name: target_key} for read-only dedup.
779
+ e.g. {"read": "/path/to/file", "glob": "**/*.ts"}
780
+ If a pre-computed fingerprint (with argument hashes) is provided,
781
+ use it directly. Otherwise fall back to name-only fingerprint.
782
+ """
783
+ fp = fingerprint or ("|".join(sorted(tool_names)) if tool_names else "")
784
+ self.tool_call_history.append(fp)
760
785
  # Keep last 30 entries
761
786
  if len(self.tool_call_history) > 30:
762
787
  self.tool_call_history = self.tool_call_history[-30:]
763
788
 
789
+ # Track read-only tool targets for dedup (Option 3)
790
+ if tool_targets:
791
+ for name, target in tool_targets.items():
792
+ if name.lower() in {n.lower() for n in _READ_ONLY_TOOL_CLASS} and target:
793
+ by_tool = self.tool_target_history.setdefault(name, {})
794
+ by_tool[target] = by_tool.get(target, 0) + 1
795
+
796
+ def has_duplicate_read_target(self, threshold: int = 2) -> tuple[bool, str]:
797
+ """Check if any read-only tool has re-read the same target >= threshold times.
798
+
799
+ Returns (is_duplicate, tool_name) for the first offending tool.
800
+ """
801
+ for tool_name, targets in self.tool_target_history.items():
802
+ for target, count in targets.items():
803
+ if count >= threshold:
804
+ return True, tool_name
805
+ return False, ""
806
+
807
+ def reset_tool_targets(self):
808
+ """Clear target history (on phase reset or fresh user text)."""
809
+ self.tool_target_history = {}
810
+
764
811
  def detect_tool_loop(self, window: int = 6) -> tuple[bool, int]:
765
812
  """Detect if the model is stuck in a tool call loop.
766
813
 
@@ -851,6 +898,7 @@ class SessionMonitor:
851
898
  self.tool_state_review_cycles = 0
852
899
  self.cycling_tool_names = []
853
900
  self.last_tool_fingerprint = ""
901
+ self.reset_tool_targets()
854
902
 
855
903
  def update_completion_state(self, anthropic_body: dict, has_tool_results: bool):
856
904
  self.completion_required = _should_enforce_completion_contract(anthropic_body)
@@ -2095,6 +2143,8 @@ def _resolve_state_machine_tool_choice(
2095
2143
  monitor.invalid_tool_call_streak = 0
2096
2144
  monitor.required_tool_miss_streak = 0
2097
2145
  monitor.reset_tool_turn_state(reason="fresh_user_text")
2146
+ monitor.finalize_continuation_count = 0
2147
+ monitor.finalize_synthetic_tool_id = ""
2098
2148
  return None, "fresh_user_text"
2099
2149
 
2100
2150
  active_loop = (
@@ -2113,6 +2163,8 @@ def _resolve_state_machine_tool_choice(
2113
2163
  monitor.invalid_tool_call_streak = 0
2114
2164
  monitor.required_tool_miss_streak = 0
2115
2165
  monitor.reset_tool_turn_state(reason="inactive_loop")
2166
+ monitor.finalize_continuation_count = 0
2167
+ monitor.finalize_synthetic_tool_id = ""
2116
2168
  return None, "inactive_loop"
2117
2169
 
2118
2170
  if monitor.tool_turn_phase == "bootstrap":
@@ -2158,6 +2210,16 @@ def _resolve_state_machine_tool_choice(
2158
2210
  return "finalize", "review_cycle_limit"
2159
2211
 
2160
2212
  if monitor.tool_turn_phase == "act":
2213
+ # Option 3: Early cycle break when same read target is hit 3+ times
2214
+ dup_target, dup_tool = monitor.has_duplicate_read_target(threshold=3)
2215
+ if dup_target and not cycle_looping and not stagnating:
2216
+ cycle_looping = True
2217
+ cycle_repeat = 2
2218
+ logger.warning(
2219
+ "TOOL STATE MACHINE: duplicate read target detected for '%s', triggering early cycle break",
2220
+ dup_tool,
2221
+ )
2222
+
2161
2223
  if cycle_looping or stagnating:
2162
2224
  reason = "cycle_detected" if cycle_looping else "stagnation"
2163
2225
  monitor.set_tool_turn_phase("review", reason=reason)
@@ -2169,9 +2231,15 @@ def _resolve_state_machine_tool_choice(
2169
2231
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2170
2232
  )
2171
2233
  # Capture which tools are cycling for narrowing/hint injection
2234
+ # Strip argument hashes (e.g. "glob:abc12345" -> "glob") so that
2235
+ # tool narrowing can match against actual tool names.
2172
2236
  window = max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
2173
2237
  recent = [fp for fp in monitor.tool_call_history[-window:] if fp]
2174
- monitor.cycling_tool_names = list(dict.fromkeys(recent))
2238
+ raw_names = []
2239
+ for fp in recent:
2240
+ for part in fp.split("|"):
2241
+ raw_names.append(part.split(":")[0])
2242
+ monitor.cycling_tool_names = list(dict.fromkeys(raw_names))
2175
2243
  logger.warning(
2176
2244
  "TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d cycling_tools=%s)",
2177
2245
  cycle_looping,
@@ -2184,7 +2252,11 @@ def _resolve_state_machine_tool_choice(
2184
2252
 
2185
2253
  if monitor.tool_state_forced_budget_remaining <= 0:
2186
2254
  monitor.set_tool_turn_phase("review", reason="forced_budget_exhausted")
2187
- monitor.tool_state_review_cycles += 1
2255
+ # Only count toward review cycle limit if there was an actual
2256
+ # cycle/stagnation detected. Budget exhaustion alone means the
2257
+ # model is working — it just used all its turns — not cycling.
2258
+ if cycle_looping or stagnating:
2259
+ monitor.tool_state_review_cycles += 1
2188
2260
  monitor.tool_state_auto_budget_remaining = max(
2189
2261
  1, PROXY_TOOL_STATE_AUTO_BUDGET
2190
2262
  )
@@ -2192,8 +2264,10 @@ def _resolve_state_machine_tool_choice(
2192
2264
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2193
2265
  )
2194
2266
  logger.warning(
2195
- "TOOL STATE MACHINE: forced budget exhausted, entering review (cycles=%d)",
2267
+ "TOOL STATE MACHINE: forced budget exhausted, entering review (cycles=%d cycling=%s stagnating=%s)",
2196
2268
  monitor.tool_state_review_cycles,
2269
+ cycle_looping,
2270
+ stagnating,
2197
2271
  )
2198
2272
  return "required", "forced_budget_exhausted"
2199
2273
 
@@ -2206,6 +2280,14 @@ def _resolve_state_machine_tool_choice(
2206
2280
  monitor.tool_state_forced_budget_remaining = max(
2207
2281
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2208
2282
  )
2283
+ # If stagnation cleared during review, the model tried a
2284
+ # different approach — reward by reducing cycle pressure.
2285
+ if monitor.tool_state_stagnation_streak == 0 and monitor.tool_state_review_cycles > 0:
2286
+ monitor.tool_state_review_cycles = max(0, monitor.tool_state_review_cycles - 1)
2287
+ logger.info(
2288
+ "TOOL STATE MACHINE: review_cycles decremented to %d (stagnation cleared)",
2289
+ monitor.tool_state_review_cycles,
2290
+ )
2209
2291
  return "required", "review_complete"
2210
2292
 
2211
2293
  monitor.tool_state_auto_budget_remaining -= 1
@@ -2416,6 +2498,9 @@ def build_openai_request(
2416
2498
  n_msgs = len(anthropic_body.get("messages", []))
2417
2499
  has_tool_results = _conversation_has_tool_results(anthropic_body)
2418
2500
 
2501
+ # Detect and strip synthetic finalize continuation before fingerprinting
2502
+ _detect_and_strip_synthetic_continuation(anthropic_body, monitor)
2503
+
2419
2504
  # Record tool calls from the last assistant message for loop detection
2420
2505
  latest_tool_fingerprint = _record_last_assistant_tool_calls(
2421
2506
  anthropic_body, monitor
@@ -2524,24 +2609,31 @@ def build_openai_request(
2524
2609
  cycling_names,
2525
2610
  )
2526
2611
  # Option 2: Narrow tools during review to exclude cycling tools
2612
+ # Option 1 enhancement: if any cycling tool is read-only, exclude
2613
+ # the entire read-only class to prevent tool-hopping (read→glob→grep)
2527
2614
  if (
2528
2615
  monitor.tool_turn_phase == "review"
2529
2616
  and monitor.cycling_tool_names
2530
2617
  and "tools" in openai_body
2531
2618
  ):
2619
+ exclude_set = set(monitor.cycling_tool_names)
2620
+ # Expand to full read-only class if any cycling tool is read-only
2621
+ if any(n.lower() in {c.lower() for c in _READ_ONLY_TOOL_CLASS} for n in exclude_set):
2622
+ exclude_set |= _READ_ONLY_TOOL_CLASS
2532
2623
  original_count = len(openai_body["tools"])
2533
2624
  narrowed = [
2534
2625
  t
2535
2626
  for t in openai_body["tools"]
2536
- if t.get("function", {}).get("name") not in monitor.cycling_tool_names
2627
+ if t.get("function", {}).get("name") not in exclude_set
2537
2628
  ]
2538
2629
  if narrowed:
2539
2630
  openai_body["tools"] = narrowed
2540
2631
  logger.warning(
2541
- "CYCLE BREAK: narrowed tools from %d to %d (excluded %s)",
2632
+ "CYCLE BREAK: narrowed tools from %d to %d (excluded %s, read_only_class=%s)",
2542
2633
  original_count,
2543
2634
  len(narrowed),
2544
2635
  monitor.cycling_tool_names,
2636
+ any(n.lower() in {c.lower() for c in _READ_ONLY_TOOL_CLASS} for n in monitor.cycling_tool_names),
2545
2637
  )
2546
2638
  else:
2547
2639
  logger.warning(
@@ -2602,13 +2694,117 @@ def build_openai_request(
2602
2694
  return openai_body
2603
2695
 
2604
2696
 
2697
+ def _tool_call_fingerprint(block: dict) -> str:
2698
+ """Create a fingerprint for a tool call that includes both name and a
2699
+ short hash of the arguments. This prevents false cycle detection when
2700
+ the same tool is called with different arguments (e.g. reading different
2701
+ files)."""
2702
+ name = block.get("name", "unknown")
2703
+ inp = block.get("input")
2704
+ if inp:
2705
+ arg_str = json.dumps(inp, sort_keys=True, separators=(",", ":"))
2706
+ arg_hash = hashlib.md5(arg_str.encode()).hexdigest()[:8]
2707
+ return f"{name}:{arg_hash}"
2708
+ return name
2709
+
2710
+
2711
+ def _detect_and_strip_synthetic_continuation(
2712
+ anthropic_body: dict, monitor: SessionMonitor
2713
+ ) -> bool:
2714
+ """Detect if the latest messages contain a synthetic finalize continuation
2715
+ tool_use/tool_result pair. If found, strip them from the conversation and
2716
+ reset the state machine so the model gets a fresh act cycle.
2717
+
2718
+ Returns True if a synthetic continuation was detected and handled.
2719
+ """
2720
+ synthetic_id = monitor.finalize_synthetic_tool_id
2721
+ if not synthetic_id:
2722
+ return False
2723
+
2724
+ messages = anthropic_body.get("messages", [])
2725
+ if not messages:
2726
+ return False
2727
+
2728
+ # Walk backwards to find the synthetic tool_result in a user message
2729
+ found = False
2730
+ for msg in reversed(messages):
2731
+ if msg.get("role") != "user":
2732
+ continue
2733
+ content = msg.get("content")
2734
+ if not isinstance(content, list):
2735
+ break
2736
+ has_synthetic = any(
2737
+ isinstance(b, dict)
2738
+ and b.get("type") == "tool_result"
2739
+ and b.get("tool_use_id") == synthetic_id
2740
+ for b in content
2741
+ )
2742
+ if not has_synthetic:
2743
+ break
2744
+
2745
+ # Strip synthetic tool_result from user message
2746
+ new_content = [
2747
+ b for b in content
2748
+ if not (
2749
+ isinstance(b, dict)
2750
+ and b.get("type") == "tool_result"
2751
+ and b.get("tool_use_id") == synthetic_id
2752
+ )
2753
+ ]
2754
+ if not new_content:
2755
+ msg["content"] = [{"type": "text", "text": "Continue working on the task."}]
2756
+ else:
2757
+ msg["content"] = new_content
2758
+
2759
+ # Strip synthetic tool_use from the preceding assistant message
2760
+ for asst_msg in reversed(messages):
2761
+ if asst_msg.get("role") != "assistant":
2762
+ continue
2763
+ asst_content = asst_msg.get("content")
2764
+ if isinstance(asst_content, list):
2765
+ asst_msg["content"] = [
2766
+ b for b in asst_content
2767
+ if not (
2768
+ isinstance(b, dict)
2769
+ and b.get("type") == "tool_use"
2770
+ and b.get("id") == synthetic_id
2771
+ )
2772
+ ]
2773
+ break
2774
+
2775
+ found = True
2776
+ break
2777
+
2778
+ if not found:
2779
+ return False
2780
+
2781
+ # Reset state machine for fresh act cycle
2782
+ monitor.finalize_synthetic_tool_id = ""
2783
+ monitor.reset_tool_turn_state(reason="finalize_continuation_resume")
2784
+ monitor.reset_completion_recovery()
2785
+ monitor.tool_call_history = []
2786
+ logger.info(
2787
+ "FINALIZE CONTINUATION: stripped synthetic tool id=%s, "
2788
+ "reset state machine for fresh act cycle (continuations=%d/%d)",
2789
+ synthetic_id,
2790
+ monitor.finalize_continuation_count,
2791
+ PROXY_FINALIZE_CONTINUATION_MAX,
2792
+ )
2793
+ return True
2794
+
2795
+
2605
2796
  def _record_last_assistant_tool_calls(
2606
2797
  anthropic_body: dict, monitor: SessionMonitor
2607
2798
  ) -> str:
2608
2799
  """Extract tool call names from the last assistant message and record
2609
- them in the session monitor for loop detection."""
2800
+ them in the session monitor for loop detection.
2801
+
2802
+ Fingerprints now include an argument hash so that the same tool called
2803
+ with different arguments (e.g. read(file_a) vs read(file_b)) produces
2804
+ distinct fingerprints, preventing false cycle/stagnation detection."""
2610
2805
  messages = anthropic_body.get("messages", [])
2611
- tool_names = []
2806
+ tool_fingerprints = []
2807
+ tool_targets: dict[str, str] = {}
2612
2808
  for msg in reversed(messages):
2613
2809
  if msg.get("role") != "assistant":
2614
2810
  continue
@@ -2616,11 +2812,28 @@ def _record_last_assistant_tool_calls(
2616
2812
  if isinstance(content, list):
2617
2813
  for block in content:
2618
2814
  if isinstance(block, dict) and block.get("type") == "tool_use":
2619
- tool_names.append(block.get("name", "unknown"))
2815
+ tool_fingerprints.append(_tool_call_fingerprint(block))
2816
+ # Extract target key for read-only dedup (Option 3)
2817
+ name = block.get("name", "unknown")
2818
+ inp = block.get("input", {})
2819
+ if isinstance(inp, dict):
2820
+ target = (
2821
+ inp.get("file_path")
2822
+ or inp.get("path")
2823
+ or inp.get("pattern")
2824
+ or inp.get("command", "")[:80]
2825
+ )
2826
+ if target:
2827
+ tool_targets[name] = str(target)
2620
2828
  break
2621
- if tool_names:
2622
- monitor.record_tool_calls(tool_names)
2623
- return "|".join(sorted(tool_names))
2829
+ if tool_fingerprints:
2830
+ fingerprint = "|".join(sorted(tool_fingerprints))
2831
+ monitor.record_tool_calls(
2832
+ [fp.split(":")[0] for fp in tool_fingerprints],
2833
+ tool_targets=tool_targets,
2834
+ fingerprint=fingerprint,
2835
+ )
2836
+ return fingerprint
2624
2837
  return ""
2625
2838
 
2626
2839
 
@@ -4581,6 +4794,34 @@ async def _apply_malformed_tool_guardrail(
4581
4794
  )
4582
4795
  current_issue = retry_issue
4583
4796
 
4797
+ # Option 2 (PR #154): When retries exhaust during review phase, reset to
4798
+ # bootstrap instead of returning guardrail fallback. This re-enables all
4799
+ # tools (including previously excluded cycling ones) and gives the model
4800
+ # a clean shot. The cycle detector will catch re-cycling if it recurs.
4801
+ if monitor.tool_turn_phase == "review":
4802
+ logger.warning(
4803
+ "TOOL RESPONSE review-phase reset: session=%s retries exhausted in review "
4804
+ "(kind=%s malformed=%d), resetting to bootstrap for fresh attempt",
4805
+ session_id,
4806
+ current_issue.kind or issue.kind,
4807
+ monitor.malformed_tool_streak,
4808
+ )
4809
+ monitor.reset_tool_turn_state(reason="review_retry_exhausted")
4810
+ monitor.malformed_tool_streak = 0
4811
+ monitor.invalid_tool_call_streak = 0
4812
+ # Return the best response we have — even if degraded — to keep
4813
+ # the conversation moving rather than returning a guardrail stub.
4814
+ degraded_text = _sanitize_tool_call_apology_text(
4815
+ _openai_message_text(working_resp)
4816
+ ).strip()
4817
+ if degraded_text and not _looks_malformed_tool_payload(degraded_text):
4818
+ return _build_safe_text_openai_response(
4819
+ working_resp, degraded_text, finish_reason="tool_calls",
4820
+ )
4821
+ return _build_clean_guardrail_openai_response(
4822
+ working_resp, finish_reason="tool_calls",
4823
+ )
4824
+
4584
4825
  logger.error(
4585
4826
  "TOOL RESPONSE issue persisted after retries (session=%s kind=%s malformed=%d invalid=%d required_miss=%d); returning clean guardrail response",
4586
4827
  session_id,
@@ -4722,16 +4963,20 @@ def _maybe_extract_text_tool_calls(openai_resp: dict) -> dict:
4722
4963
  return openai_resp
4723
4964
 
4724
4965
 
4725
- def _detect_and_truncate_degenerate_repetition(openai_resp: dict) -> dict:
4966
+ def _detect_and_truncate_degenerate_repetition(
4967
+ openai_resp: dict,
4968
+ ) -> tuple[dict, bool]:
4726
4969
  """Detect degenerate repetitive text and truncate at first repetition.
4727
4970
 
4728
4971
  When the model produces highly repetitive output (e.g. the same 20+ char
4729
4972
  substring repeated 10+ times), truncate at the first repetition boundary
4730
4973
  and set finish_reason to stop.
4974
+
4975
+ Returns (response, was_degenerate) so the caller can retry if needed.
4731
4976
  """
4732
4977
  text = _openai_message_text(openai_resp)
4733
4978
  if not text or len(text) < 200:
4734
- return openai_resp
4979
+ return openai_resp, False
4735
4980
 
4736
4981
  # Look for repeated substrings of length 20-100
4737
4982
  for substr_len in (60, 40, 20):
@@ -4760,8 +5005,70 @@ def _detect_and_truncate_degenerate_repetition(openai_resp: dict) -> dict:
4760
5005
  msg = choices[0].get("message", {})
4761
5006
  msg["content"] = truncated
4762
5007
  choices[0]["finish_reason"] = "stop"
4763
- return openai_resp
4764
- return openai_resp
5008
+ return openai_resp, True
5009
+ return openai_resp, False
5010
+
5011
+
5012
+ def _client_has_tool(anthropic_body: dict, tool_name: str) -> bool:
5013
+ """Check if the client's tool list contains a tool with the given name (case-insensitive)."""
5014
+ lower = tool_name.lower()
5015
+ return any(
5016
+ (t.get("name") or "").lower() == lower for t in anthropic_body.get("tools", [])
5017
+ )
5018
+
5019
+
5020
+ def _client_tool_name(anthropic_body: dict, tool_name: str) -> str:
5021
+ """Return the actual tool name as the client spells it (case-sensitive match)."""
5022
+ lower = tool_name.lower()
5023
+ for t in anthropic_body.get("tools", []):
5024
+ if (t.get("name") or "").lower() == lower:
5025
+ return t["name"]
5026
+ return tool_name
5027
+
5028
+
5029
+ def _inject_synthetic_continuation(
5030
+ anthropic_resp: dict, monitor: SessionMonitor, anthropic_body: dict
5031
+ ) -> dict:
5032
+ """Inject a synthetic tool_use into a finalize-turn response to keep the
5033
+ client's agentic loop alive.
5034
+
5035
+ Appends a no-op Read("/dev/null") tool_use block and changes stop_reason
5036
+ from "end_turn" to "tool_use" so the client continues sending requests.
5037
+ """
5038
+ # Pick a safe tool the client knows about (case-insensitive match,
5039
+ # then use the client's actual casing for the tool name)
5040
+ if _client_has_tool(anthropic_body, "read"):
5041
+ tool_name = _client_tool_name(anthropic_body, "read")
5042
+ tool_input = {"file_path": "/dev/null"}
5043
+ elif _client_has_tool(anthropic_body, "bash"):
5044
+ tool_name = _client_tool_name(anthropic_body, "bash")
5045
+ tool_input = {"command": "true", "description": "continuation ping"}
5046
+ else:
5047
+ logger.warning("FINALIZE CONTINUATION: no suitable tool found, skipping injection")
5048
+ return anthropic_resp
5049
+
5050
+ synthetic_id = f"toolu_{uuid.uuid4().hex[:12]}"
5051
+ monitor.finalize_synthetic_tool_id = synthetic_id
5052
+ monitor.finalize_continuation_count += 1
5053
+
5054
+ content = anthropic_resp.get("content", [])
5055
+ content.append({
5056
+ "type": "tool_use",
5057
+ "id": synthetic_id,
5058
+ "name": tool_name,
5059
+ "input": tool_input,
5060
+ })
5061
+ anthropic_resp["content"] = content
5062
+ anthropic_resp["stop_reason"] = "tool_use"
5063
+
5064
+ logger.info(
5065
+ "FINALIZE CONTINUATION: injected synthetic %s tool_use id=%s (count=%d/%d)",
5066
+ tool_name,
5067
+ synthetic_id,
5068
+ monitor.finalize_continuation_count,
5069
+ PROXY_FINALIZE_CONTINUATION_MAX,
5070
+ )
5071
+ return anthropic_resp
4765
5072
 
4766
5073
 
4767
5074
  def openai_to_anthropic_response(openai_resp: dict, model: str) -> dict:
@@ -5595,8 +5902,51 @@ async def messages(request: Request):
5595
5902
  session_id,
5596
5903
  )
5597
5904
 
5598
- openai_resp = _detect_and_truncate_degenerate_repetition(openai_resp)
5905
+ openai_resp, was_degenerate = _detect_and_truncate_degenerate_repetition(openai_resp)
5906
+ if was_degenerate:
5907
+ # Retry with constrained parameters to avoid degenerate output.
5908
+ # With tools: force tool_choice=required for a useful tool call.
5909
+ # Without tools (finalize): retry with capped max_tokens for clean text.
5910
+ has_tools = bool(strict_body.get("tools"))
5911
+ retry_body = dict(strict_body)
5912
+ retry_body["max_tokens"] = 2048
5913
+ retry_body["temperature"] = 0.1
5914
+ retry_body["stream"] = False
5915
+ if has_tools:
5916
+ retry_body["tool_choice"] = "required"
5917
+ logger.warning("DEGENERATE RETRY: retrying with tool_choice=required max_tokens=2048")
5918
+ else:
5919
+ logger.warning("DEGENERATE RETRY: retrying text-only with max_tokens=2048 temp=0.1")
5920
+ try:
5921
+ retry_resp = await _post_with_generation_timeout(
5922
+ client, f"{LLAMA_CPP_BASE}/chat/completions", retry_body,
5923
+ {"Content-Type": "application/json"},
5924
+ )
5925
+ if retry_resp.status_code == 200:
5926
+ retry_data = retry_resp.json()
5927
+ retry_text = _openai_message_text(retry_data)
5928
+ _, retry_degenerate = _detect_and_truncate_degenerate_repetition(retry_data)
5929
+ if retry_degenerate:
5930
+ logger.info("DEGENERATE RETRY: retry also degenerate, using truncated original")
5931
+ elif has_tools and (retry_data.get("choices", [{}])[0]
5932
+ .get("message", {}).get("tool_calls")):
5933
+ logger.info("DEGENERATE RETRY: success, got tool call")
5934
+ openai_resp = retry_data
5935
+ elif not has_tools and retry_text and len(retry_text) > 50:
5936
+ logger.info("DEGENERATE RETRY: success, got clean text (%d chars)", len(retry_text))
5937
+ openai_resp = retry_data
5938
+ else:
5939
+ logger.info("DEGENERATE RETRY: retry insufficient, using truncated original")
5940
+ except Exception as exc:
5941
+ logger.warning("DEGENERATE RETRY: failed: %s", exc)
5599
5942
  anthropic_resp = openai_to_anthropic_response(openai_resp, model)
5943
+ # FINALIZE CONTINUATION: inject synthetic tool_use to keep client loop alive
5944
+ if (
5945
+ monitor.finalize_turn_active
5946
+ and monitor.finalize_continuation_count < PROXY_FINALIZE_CONTINUATION_MAX
5947
+ and anthropic_resp.get("stop_reason") == "end_turn"
5948
+ ):
5949
+ anthropic_resp = _inject_synthetic_continuation(anthropic_resp, monitor, body)
5600
5950
  monitor.record_response(anthropic_resp.get("usage", {}).get("output_tokens", 0))
5601
5951
  # Update last_input_tokens from upstream's actual prompt_tokens
5602
5952
  upstream_input = anthropic_resp.get("usage", {}).get("input_tokens", 0)
@@ -5934,8 +6284,38 @@ async def messages(request: Request):
5934
6284
  monitor.invalid_tool_call_streak = 0
5935
6285
  monitor.required_tool_miss_streak = 0
5936
6286
 
5937
- openai_resp = _detect_and_truncate_degenerate_repetition(openai_resp)
6287
+ openai_resp, was_degenerate = _detect_and_truncate_degenerate_repetition(openai_resp)
6288
+ # Degenerate retry for non-guarded stream path
6289
+ if was_degenerate and openai_body.get("tools"):
6290
+ logger.warning("DEGENERATE RETRY (stream): retrying with tool_choice=required max_tokens=2048")
6291
+ retry_body = dict(openai_body)
6292
+ retry_body["tool_choice"] = "required"
6293
+ retry_body["max_tokens"] = 2048
6294
+ retry_body["temperature"] = 0.1
6295
+ retry_body["stream"] = False
6296
+ try:
6297
+ retry_resp = await _post_with_generation_timeout(
6298
+ client, f"{LLAMA_CPP_BASE}/chat/completions", retry_body,
6299
+ {"Content-Type": "application/json"},
6300
+ )
6301
+ if retry_resp.status_code == 200:
6302
+ retry_data = retry_resp.json()
6303
+ if (retry_data.get("choices", [{}])[0]
6304
+ .get("message", {}).get("tool_calls")):
6305
+ logger.info("DEGENERATE RETRY (stream): success, got tool call")
6306
+ openai_resp = retry_data
6307
+ else:
6308
+ logger.info("DEGENERATE RETRY (stream): no tool call, using truncated")
6309
+ except Exception as exc:
6310
+ logger.warning("DEGENERATE RETRY (stream): failed: %s", exc)
5938
6311
  anthropic_resp = openai_to_anthropic_response(openai_resp, model)
6312
+ # FINALIZE CONTINUATION: inject synthetic tool_use (non-guarded stream path)
6313
+ if (
6314
+ monitor.finalize_turn_active
6315
+ and monitor.finalize_continuation_count < PROXY_FINALIZE_CONTINUATION_MAX
6316
+ and anthropic_resp.get("stop_reason") == "end_turn"
6317
+ ):
6318
+ anthropic_resp = _inject_synthetic_continuation(anthropic_resp, monitor, body)
5939
6319
 
5940
6320
  # Track output tokens in session monitor
5941
6321
  output_tokens = anthropic_resp.get("usage", {}).get("output_tokens", 0)
@@ -1892,12 +1892,13 @@ class TestToolTurnControls(unittest.TestCase):
1892
1892
  monitor = proxy.SessionMonitor(context_window=262144)
1893
1893
  monitor.tool_turn_phase = "act"
1894
1894
  monitor.tool_state_forced_budget_remaining = 20
1895
+ # Use hash-format fingerprints to match _tool_call_fingerprint output
1895
1896
  monitor.tool_call_history = [
1896
- "Bash",
1897
+ "Bash:1e7b8d07",
1897
1898
  "TaskOutput",
1898
- "Bash",
1899
+ "Bash:1e7b8d07",
1899
1900
  "TaskOutput",
1900
- "Bash",
1901
+ "Bash:1e7b8d07",
1901
1902
  "TaskOutput",
1902
1903
  ]
1903
1904
  monitor.last_tool_fingerprint = "TaskOutput"
@@ -2076,7 +2077,9 @@ class TestToolTurnControls(unittest.TestCase):
2076
2077
  # Review phase now keeps required to prevent end-turn escape
2077
2078
  self.assertEqual(openai.get("tool_choice"), "required")
2078
2079
  self.assertEqual(monitor.tool_turn_phase, "review")
2079
- self.assertEqual(monitor.tool_state_review_cycles, 1)
2080
+ # review_cycles only increments when cycle_looping or stagnating,
2081
+ # not on mere budget exhaustion (model was working, not cycling)
2082
+ self.assertEqual(monitor.tool_state_review_cycles, 0)
2080
2083
  finally:
2081
2084
  setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
2082
2085
  setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
@@ -2242,7 +2245,11 @@ class TestToolTurnControls(unittest.TestCase):
2242
2245
  monitor = proxy.SessionMonitor(context_window=262144)
2243
2246
  monitor.tool_turn_phase = "act"
2244
2247
  monitor.tool_state_stagnation_streak = 4
2245
- monitor.tool_call_history = ["Bash", "TaskOutput", "Bash", "TaskOutput"]
2248
+ # Use hash-format fingerprints to match _tool_call_fingerprint output
2249
+ monitor.tool_call_history = [
2250
+ "Bash:1e7b8d07", "TaskOutput", "Bash:1e7b8d07", "TaskOutput",
2251
+ "Bash:1e7b8d07", "TaskOutput",
2252
+ ]
2246
2253
  monitor.last_tool_fingerprint = "TaskOutput"
2247
2254
 
2248
2255
  body = {
@@ -3262,8 +3269,11 @@ class TestCycleBreakOptions(unittest.TestCase):
3262
3269
  monitor = proxy.SessionMonitor(context_window=262144)
3263
3270
  monitor.tool_turn_phase = "act"
3264
3271
  monitor.tool_state_forced_budget_remaining = 20
3265
- monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
3266
- monitor.last_tool_fingerprint = "Bash"
3272
+ # Hash-format fingerprints matching Bash+{"command":"ls"}
3273
+ monitor.tool_call_history = [
3274
+ "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad",
3275
+ ]
3276
+ monitor.last_tool_fingerprint = "Bash:781c24ad"
3267
3277
 
3268
3278
  body = {
3269
3279
  "model": "test",
@@ -3323,8 +3333,11 @@ class TestCycleBreakOptions(unittest.TestCase):
3323
3333
  monitor = proxy.SessionMonitor(context_window=262144)
3324
3334
  monitor.tool_turn_phase = "act"
3325
3335
  monitor.tool_state_forced_budget_remaining = 20
3326
- monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
3327
- monitor.last_tool_fingerprint = "Bash"
3336
+ # Hash-format fingerprints matching Bash+{"command":"ls"}
3337
+ monitor.tool_call_history = [
3338
+ "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad",
3339
+ ]
3340
+ monitor.last_tool_fingerprint = "Bash:781c24ad"
3328
3341
 
3329
3342
  body = {
3330
3343
  "model": "test",
@@ -3369,9 +3382,9 @@ class TestCycleBreakOptions(unittest.TestCase):
3369
3382
  """Option 3: default forced budget reduced from 24 to 12."""
3370
3383
  self.assertEqual(proxy.PROXY_TOOL_STATE_FORCED_BUDGET, 12)
3371
3384
 
3372
- def test_review_cycle_limit_default_is_1(self):
3373
- """Option 4: default review cycle limit reduced from 2 to 1."""
3374
- self.assertEqual(proxy.PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT, 1)
3385
+ def test_review_cycle_limit_default_is_3(self):
3386
+ """Option 4: default review cycle limit is 3."""
3387
+ self.assertEqual(proxy.PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT, 3)
3375
3388
 
3376
3389
  def test_cycling_tool_names_cleared_on_reset(self):
3377
3390
  """cycling_tool_names is cleared when tool turn state resets."""
@@ -3450,8 +3463,9 @@ class TestDegenerateRepetitionDetection(unittest.TestCase):
3450
3463
  openai_resp = {
3451
3464
  "choices": [{"message": {"content": repeated}, "finish_reason": "length"}]
3452
3465
  }
3453
- result = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3466
+ result, truncated = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3454
3467
  truncated_text = result["choices"][0]["message"]["content"]
3468
+ self.assertTrue(truncated)
3455
3469
  self.assertLess(len(truncated_text), len(repeated))
3456
3470
  self.assertEqual(result["choices"][0]["finish_reason"], "stop")
3457
3471
 
@@ -3461,7 +3475,8 @@ class TestDegenerateRepetitionDetection(unittest.TestCase):
3461
3475
  openai_resp = {
3462
3476
  "choices": [{"message": {"content": text}, "finish_reason": "stop"}]
3463
3477
  }
3464
- result = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3478
+ result, truncated = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3479
+ self.assertFalse(truncated)
3465
3480
  self.assertEqual(result["choices"][0]["message"]["content"], text)
3466
3481
 
3467
3482
  def test_preserves_short_text(self):
@@ -3470,7 +3485,8 @@ class TestDegenerateRepetitionDetection(unittest.TestCase):
3470
3485
  openai_resp = {
3471
3486
  "choices": [{"message": {"content": text}, "finish_reason": "stop"}]
3472
3487
  }
3473
- result = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3488
+ result, truncated = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3489
+ self.assertFalse(truncated)
3474
3490
  self.assertEqual(result["choices"][0]["message"]["content"], text)
3475
3491
 
3476
3492
  def test_max_tokens_floor_skipped_for_non_tool_requests(self):
@@ -4178,3 +4194,215 @@ class TestFinalizePingPongFix(unittest.TestCase):
4178
4194
  proxy.TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = old_compat
4179
4195
  proxy.PROXY_TOOL_CALL_GRAMMAR = old_flag
4180
4196
  proxy.TOOL_CALL_GBNF = old_gbnf
4197
+
4198
+
4199
+ class TestReviewPhaseBootstrapReset(unittest.TestCase):
4200
+ """Tests for bootstrap reset after exhausted retries in review phase (PR #154)."""
4201
+
4202
+ def _make_monitor_in_review(self):
4203
+ m = proxy.SessionMonitor()
4204
+ m.set_tool_turn_phase("review", reason="test")
4205
+ m.malformed_tool_streak = 3
4206
+ m.invalid_tool_call_streak = 0
4207
+ return m
4208
+
4209
+ def _make_monitor_in_act(self):
4210
+ m = proxy.SessionMonitor()
4211
+ m.set_tool_turn_phase("act", reason="test")
4212
+ m.malformed_tool_streak = 3
4213
+ return m
4214
+
4215
+ def test_review_phase_resets_to_bootstrap(self):
4216
+ """After retries exhaust in review, monitor resets to bootstrap."""
4217
+ m = self._make_monitor_in_review()
4218
+ self.assertEqual(m.tool_turn_phase, "review")
4219
+ self.assertEqual(m.malformed_tool_streak, 3)
4220
+
4221
+ # Simulate what happens after retry exhaustion: the code checks
4222
+ # monitor.tool_turn_phase == "review" and resets
4223
+ if m.tool_turn_phase == "review":
4224
+ m.reset_tool_turn_state(reason="review_retry_exhausted")
4225
+ m.malformed_tool_streak = 0
4226
+ m.invalid_tool_call_streak = 0
4227
+
4228
+ self.assertEqual(m.tool_turn_phase, "bootstrap")
4229
+ self.assertEqual(m.malformed_tool_streak, 0)
4230
+ self.assertEqual(m.tool_state_stagnation_streak, 0)
4231
+ self.assertEqual(m.cycling_tool_names, [])
4232
+
4233
+ def test_act_phase_does_not_reset(self):
4234
+ """In act phase, retries exhaustion should NOT trigger bootstrap reset."""
4235
+ m = self._make_monitor_in_act()
4236
+ # The bootstrap reset only triggers for review phase
4237
+ self.assertNotEqual(m.tool_turn_phase, "review")
4238
+ # In act phase, the normal guardrail fallback path runs instead
4239
+
4240
+
4241
+ class TestReadOnlyCycleClassExclusion(unittest.TestCase):
4242
+ """Tests for Option 1: read-only tool class exclusion on cycle break,
4243
+ Option 2: reduced cycle window (3), and Option 3: duplicate target dedup."""
4244
+
4245
+ def _make_body_with_tools(self, tool_names):
4246
+ """Build a minimal Anthropic body with named tools and a tool_result."""
4247
+ tools = [
4248
+ {"name": n, "description": f"{n} tool", "input_schema": {"type": "object"}}
4249
+ for n in tool_names
4250
+ ]
4251
+ return {
4252
+ "model": "test",
4253
+ "messages": [
4254
+ {"role": "user", "content": "do something"},
4255
+ {
4256
+ "role": "assistant",
4257
+ "content": [
4258
+ {
4259
+ "type": "tool_use",
4260
+ "id": "toolu_1",
4261
+ "name": tool_names[0],
4262
+ "input": {"file_path": "/some/file.ts"},
4263
+ }
4264
+ ],
4265
+ },
4266
+ {
4267
+ "role": "user",
4268
+ "content": [
4269
+ {"type": "tool_result", "tool_use_id": "toolu_1", "content": "ok"}
4270
+ ],
4271
+ },
4272
+ ],
4273
+ "tools": tools,
4274
+ }
4275
+
4276
+ def test_read_only_class_exclusion_expands(self):
4277
+ """When 'read' is cycling, all read-only tools are excluded, not just 'read'."""
4278
+ old_vals = {
4279
+ "PROXY_TOOL_STATE_MACHINE": getattr(proxy, "PROXY_TOOL_STATE_MACHINE"),
4280
+ "PROXY_TOOL_STATE_MIN_MESSAGES": getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES"),
4281
+ "PROXY_TOOL_STATE_FORCED_BUDGET": getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET"),
4282
+ "PROXY_TOOL_STATE_CYCLE_WINDOW": getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW"),
4283
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD": getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"),
4284
+ }
4285
+ try:
4286
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4287
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4288
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 3)
4289
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4290
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4291
+
4292
+ all_tools = ["read", "glob", "grep", "bash", "write", "edit"]
4293
+ body = self._make_body_with_tools(all_tools)
4294
+ monitor = proxy.SessionMonitor(context_window=262144)
4295
+
4296
+ # Simulate cycling on 'read' by recording 3 identical fingerprints
4297
+ # Hash-format matching read+{"file_path":"/some/file.ts"}
4298
+ fp = "read:cfb28722"
4299
+ monitor.record_tool_calls(["read"], fingerprint=fp)
4300
+ monitor.record_tool_calls(["read"], fingerprint=fp)
4301
+ monitor.record_tool_calls(["read"], fingerprint=fp)
4302
+
4303
+ openai_body = proxy.build_openai_request(body, monitor)
4304
+
4305
+ # After cycle break, the tools in the body should exclude ALL
4306
+ # read-only tools, not just 'read'
4307
+ remaining_names = [
4308
+ t.get("function", {}).get("name") for t in openai_body.get("tools", [])
4309
+ ]
4310
+ self.assertNotIn("read", remaining_names)
4311
+ self.assertNotIn("glob", remaining_names)
4312
+ self.assertNotIn("grep", remaining_names)
4313
+ # Write/action tools should remain
4314
+ self.assertIn("bash", remaining_names)
4315
+ self.assertIn("write", remaining_names)
4316
+ self.assertIn("edit", remaining_names)
4317
+ finally:
4318
+ for k, v in old_vals.items():
4319
+ setattr(proxy, k, v)
4320
+
4321
+ def test_non_read_tool_cycling_no_class_expansion(self):
4322
+ """When 'bash' is cycling, only 'bash' is excluded, not read-only tools."""
4323
+ old_vals = {
4324
+ "PROXY_TOOL_STATE_MACHINE": getattr(proxy, "PROXY_TOOL_STATE_MACHINE"),
4325
+ "PROXY_TOOL_STATE_MIN_MESSAGES": getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES"),
4326
+ "PROXY_TOOL_STATE_FORCED_BUDGET": getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET"),
4327
+ "PROXY_TOOL_STATE_CYCLE_WINDOW": getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW"),
4328
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD": getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"),
4329
+ }
4330
+ try:
4331
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4332
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4333
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 3)
4334
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4335
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4336
+
4337
+ all_tools = ["read", "glob", "grep", "bash", "write", "edit"]
4338
+ body = self._make_body_with_tools(all_tools)
4339
+ # Change the assistant tool_use to bash
4340
+ body["messages"][1]["content"][0]["name"] = "bash"
4341
+ body["messages"][1]["content"][0]["input"] = {"command": "ls"}
4342
+ monitor = proxy.SessionMonitor(context_window=262144)
4343
+
4344
+ # Use hash-format fingerprints matching bash+{"command":"ls"}
4345
+ fp = "bash:781c24ad"
4346
+ monitor.record_tool_calls(["bash"], fingerprint=fp)
4347
+ monitor.record_tool_calls(["bash"], fingerprint=fp)
4348
+ monitor.record_tool_calls(["bash"], fingerprint=fp)
4349
+
4350
+ openai_body = proxy.build_openai_request(body, monitor)
4351
+
4352
+ remaining_names = [
4353
+ t.get("function", {}).get("name") for t in openai_body.get("tools", [])
4354
+ ]
4355
+ self.assertNotIn("bash", remaining_names)
4356
+ # Read-only tools should still be available
4357
+ self.assertIn("read", remaining_names)
4358
+ self.assertIn("glob", remaining_names)
4359
+ self.assertIn("grep", remaining_names)
4360
+ finally:
4361
+ for k, v in old_vals.items():
4362
+ setattr(proxy, k, v)
4363
+
4364
+ def test_duplicate_read_target_triggers_early_cycle(self):
4365
+ """Option 3: reading same file 3+ times triggers early cycle break."""
4366
+ monitor = proxy.SessionMonitor(context_window=262144)
4367
+
4368
+ # Record 3 reads of same target
4369
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/to/file.ts"})
4370
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/to/file.ts"})
4371
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/to/file.ts"})
4372
+
4373
+ dup, tool = monitor.has_duplicate_read_target(threshold=3)
4374
+ self.assertTrue(dup)
4375
+ self.assertEqual(tool, "read")
4376
+
4377
+ def test_different_read_targets_no_duplicate(self):
4378
+ """Option 3: reading different files does NOT trigger duplicate detection."""
4379
+ monitor = proxy.SessionMonitor(context_window=262144)
4380
+
4381
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/a.ts"})
4382
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/b.ts"})
4383
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/c.ts"})
4384
+
4385
+ dup, _ = monitor.has_duplicate_read_target(threshold=3)
4386
+ self.assertFalse(dup)
4387
+
4388
+ def test_cycle_window_default_is_3(self):
4389
+ """Option 2: verify default cycle window is now 3."""
4390
+ # This tests the constant directly
4391
+ self.assertEqual(
4392
+ int(proxy.os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "3")), 3
4393
+ )
4394
+
4395
+ def test_target_history_reset_on_state_reset(self):
4396
+ """Target history is cleared when tool state resets."""
4397
+ monitor = proxy.SessionMonitor(context_window=262144)
4398
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/file.ts"})
4399
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/file.ts"})
4400
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/file.ts"})
4401
+
4402
+ dup, _ = monitor.has_duplicate_read_target(threshold=3)
4403
+ self.assertTrue(dup)
4404
+
4405
+ monitor.reset_tool_turn_state(reason="test_reset")
4406
+
4407
+ dup, _ = monitor.has_duplicate_read_target(threshold=3)
4408
+ self.assertFalse(dup)