@miller-tech/uap 1.20.24 → 1.20.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.24",
3
+ "version": "1.20.25",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -155,16 +155,16 @@ PROXY_TOOL_STATE_FORCED_BUDGET = int(
155
155
  )
156
156
  PROXY_TOOL_STATE_AUTO_BUDGET = int(os.environ.get("PROXY_TOOL_STATE_AUTO_BUDGET", "2"))
157
157
  PROXY_TOOL_STATE_STAGNATION_THRESHOLD = int(
158
- os.environ.get("PROXY_TOOL_STATE_STAGNATION_THRESHOLD", "5")
158
+ os.environ.get("PROXY_TOOL_STATE_STAGNATION_THRESHOLD", "8")
159
159
  )
160
160
  PROXY_TOOL_STATE_CYCLE_WINDOW = int(
161
- os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "4")
161
+ os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "3")
162
162
  )
163
163
  PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
164
164
  os.environ.get("PROXY_TOOL_STATE_FINALIZE_THRESHOLD", "18")
165
165
  )
166
166
  PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
167
- os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "1")
167
+ os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "3")
168
168
  )
169
169
  PROXY_COMPLETION_RECOVERY_MAX = int(
170
170
  os.environ.get("PROXY_COMPLETION_RECOVERY_MAX", "3")
@@ -189,6 +189,12 @@ PROXY_TOOL_NARROWING_EXPAND_ON_LOOP = os.environ.get(
189
189
  "off",
190
190
  "no",
191
191
  }
192
+ # Read-only tools that should be excluded as a class when any one cycles
193
+ _READ_ONLY_TOOL_CLASS = frozenset({
194
+ "read", "glob", "grep", "Read", "Glob", "Grep",
195
+ "search", "Search", "list_files", "ListFiles",
196
+ })
197
+
192
198
  PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() not in {
193
199
  "0",
194
200
  "false",
@@ -196,6 +202,9 @@ PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() no
196
202
  "no",
197
203
  }
198
204
  PROXY_SESSION_TTL_SECS = int(os.environ.get("PROXY_SESSION_TTL_SECS", "7200"))
205
+ PROXY_FINALIZE_CONTINUATION_MAX = int(
206
+ os.environ.get("PROXY_FINALIZE_CONTINUATION_MAX", "3")
207
+ )
199
208
  PROXY_STREAM_REASONING_FALLBACK = (
200
209
  os.environ.get("PROXY_STREAM_REASONING_FALLBACK", "off").strip().lower()
201
210
  )
@@ -621,6 +630,9 @@ class SessionMonitor:
621
630
  tool_call_history: list = field(
622
631
  default_factory=list
623
632
  ) # Recent tool call fingerprints
633
+ tool_target_history: dict = field(
634
+ default_factory=dict
635
+ ) # {tool_name: {target: count}} for read-only dedup
624
636
  consecutive_forced_count: int = (
625
637
  0 # How many times tool_choice was forced consecutively
626
638
  )
@@ -646,6 +658,8 @@ class SessionMonitor:
646
658
  cycling_tool_names: list = field(default_factory=list)
647
659
  last_response_garbled: bool = False # previous turn had garbled/malformed output
648
660
  finalize_turn_active: bool = False
661
+ finalize_continuation_count: int = 0
662
+ finalize_synthetic_tool_id: str = ""
649
663
  completion_required: bool = False
650
664
  completion_pending: bool = False
651
665
  completion_verified: bool = False
@@ -753,14 +767,47 @@ class SessionMonitor:
753
767
 
754
768
  # --- Token Loop Protection Methods ---
755
769
 
756
- def record_tool_calls(self, tool_names: list[str]):
757
- """Record tool call names for loop detection."""
758
- fingerprint = "|".join(sorted(tool_names)) if tool_names else ""
759
- self.tool_call_history.append(fingerprint)
770
+ def record_tool_calls(
771
+ self,
772
+ tool_names: list[str],
773
+ tool_targets: dict[str, str] | None = None,
774
+ fingerprint: str = "",
775
+ ):
776
+ """Record tool call names for loop detection.
777
+
778
+ tool_targets: optional {tool_name: target_key} for read-only dedup.
779
+ e.g. {"read": "/path/to/file", "glob": "**/*.ts"}
780
+ If a pre-computed fingerprint (with argument hashes) is provided,
781
+ use it directly. Otherwise fall back to name-only fingerprint.
782
+ """
783
+ fp = fingerprint or ("|".join(sorted(tool_names)) if tool_names else "")
784
+ self.tool_call_history.append(fp)
760
785
  # Keep last 30 entries
761
786
  if len(self.tool_call_history) > 30:
762
787
  self.tool_call_history = self.tool_call_history[-30:]
763
788
 
789
+ # Track read-only tool targets for dedup (Option 3)
790
+ if tool_targets:
791
+ for name, target in tool_targets.items():
792
+ if name.lower() in {n.lower() for n in _READ_ONLY_TOOL_CLASS} and target:
793
+ by_tool = self.tool_target_history.setdefault(name, {})
794
+ by_tool[target] = by_tool.get(target, 0) + 1
795
+
796
+ def has_duplicate_read_target(self, threshold: int = 2) -> tuple[bool, str]:
797
+ """Check if any read-only tool has re-read the same target >= threshold times.
798
+
799
+ Returns (is_duplicate, tool_name) for the first offending tool.
800
+ """
801
+ for tool_name, targets in self.tool_target_history.items():
802
+ for target, count in targets.items():
803
+ if count >= threshold:
804
+ return True, tool_name
805
+ return False, ""
806
+
807
+ def reset_tool_targets(self):
808
+ """Clear target history (on phase reset or fresh user text)."""
809
+ self.tool_target_history = {}
810
+
764
811
  def detect_tool_loop(self, window: int = 6) -> tuple[bool, int]:
765
812
  """Detect if the model is stuck in a tool call loop.
766
813
 
@@ -851,6 +898,7 @@ class SessionMonitor:
851
898
  self.tool_state_review_cycles = 0
852
899
  self.cycling_tool_names = []
853
900
  self.last_tool_fingerprint = ""
901
+ self.reset_tool_targets()
854
902
 
855
903
  def update_completion_state(self, anthropic_body: dict, has_tool_results: bool):
856
904
  self.completion_required = _should_enforce_completion_contract(anthropic_body)
@@ -2095,6 +2143,8 @@ def _resolve_state_machine_tool_choice(
2095
2143
  monitor.invalid_tool_call_streak = 0
2096
2144
  monitor.required_tool_miss_streak = 0
2097
2145
  monitor.reset_tool_turn_state(reason="fresh_user_text")
2146
+ monitor.finalize_continuation_count = 0
2147
+ monitor.finalize_synthetic_tool_id = ""
2098
2148
  return None, "fresh_user_text"
2099
2149
 
2100
2150
  active_loop = (
@@ -2113,6 +2163,8 @@ def _resolve_state_machine_tool_choice(
2113
2163
  monitor.invalid_tool_call_streak = 0
2114
2164
  monitor.required_tool_miss_streak = 0
2115
2165
  monitor.reset_tool_turn_state(reason="inactive_loop")
2166
+ monitor.finalize_continuation_count = 0
2167
+ monitor.finalize_synthetic_tool_id = ""
2116
2168
  return None, "inactive_loop"
2117
2169
 
2118
2170
  if monitor.tool_turn_phase == "bootstrap":
@@ -2158,6 +2210,16 @@ def _resolve_state_machine_tool_choice(
2158
2210
  return "finalize", "review_cycle_limit"
2159
2211
 
2160
2212
  if monitor.tool_turn_phase == "act":
2213
+ # Option 3: Early cycle break when same read target is hit 3+ times
2214
+ dup_target, dup_tool = monitor.has_duplicate_read_target(threshold=3)
2215
+ if dup_target and not cycle_looping and not stagnating:
2216
+ cycle_looping = True
2217
+ cycle_repeat = 2
2218
+ logger.warning(
2219
+ "TOOL STATE MACHINE: duplicate read target detected for '%s', triggering early cycle break",
2220
+ dup_tool,
2221
+ )
2222
+
2161
2223
  if cycle_looping or stagnating:
2162
2224
  reason = "cycle_detected" if cycle_looping else "stagnation"
2163
2225
  monitor.set_tool_turn_phase("review", reason=reason)
@@ -2169,9 +2231,15 @@ def _resolve_state_machine_tool_choice(
2169
2231
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2170
2232
  )
2171
2233
  # Capture which tools are cycling for narrowing/hint injection
2234
+ # Strip argument hashes (e.g. "glob:abc12345" -> "glob") so that
2235
+ # tool narrowing can match against actual tool names.
2172
2236
  window = max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
2173
2237
  recent = [fp for fp in monitor.tool_call_history[-window:] if fp]
2174
- monitor.cycling_tool_names = list(dict.fromkeys(recent))
2238
+ raw_names = []
2239
+ for fp in recent:
2240
+ for part in fp.split("|"):
2241
+ raw_names.append(part.split(":")[0])
2242
+ monitor.cycling_tool_names = list(dict.fromkeys(raw_names))
2175
2243
  logger.warning(
2176
2244
  "TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d cycling_tools=%s)",
2177
2245
  cycle_looping,
@@ -2184,7 +2252,11 @@ def _resolve_state_machine_tool_choice(
2184
2252
 
2185
2253
  if monitor.tool_state_forced_budget_remaining <= 0:
2186
2254
  monitor.set_tool_turn_phase("review", reason="forced_budget_exhausted")
2187
- monitor.tool_state_review_cycles += 1
2255
+ # Only count toward review cycle limit if there was an actual
2256
+ # cycle/stagnation detected. Budget exhaustion alone means the
2257
+ # model is working — it just used all its turns — not cycling.
2258
+ if cycle_looping or stagnating:
2259
+ monitor.tool_state_review_cycles += 1
2188
2260
  monitor.tool_state_auto_budget_remaining = max(
2189
2261
  1, PROXY_TOOL_STATE_AUTO_BUDGET
2190
2262
  )
@@ -2192,8 +2264,10 @@ def _resolve_state_machine_tool_choice(
2192
2264
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2193
2265
  )
2194
2266
  logger.warning(
2195
- "TOOL STATE MACHINE: forced budget exhausted, entering review (cycles=%d)",
2267
+ "TOOL STATE MACHINE: forced budget exhausted, entering review (cycles=%d cycling=%s stagnating=%s)",
2196
2268
  monitor.tool_state_review_cycles,
2269
+ cycle_looping,
2270
+ stagnating,
2197
2271
  )
2198
2272
  return "required", "forced_budget_exhausted"
2199
2273
 
@@ -2206,6 +2280,14 @@ def _resolve_state_machine_tool_choice(
2206
2280
  monitor.tool_state_forced_budget_remaining = max(
2207
2281
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2208
2282
  )
2283
+ # If stagnation cleared during review, the model tried a
2284
+ # different approach — reward by reducing cycle pressure.
2285
+ if monitor.tool_state_stagnation_streak == 0 and monitor.tool_state_review_cycles > 0:
2286
+ monitor.tool_state_review_cycles = max(0, monitor.tool_state_review_cycles - 1)
2287
+ logger.info(
2288
+ "TOOL STATE MACHINE: review_cycles decremented to %d (stagnation cleared)",
2289
+ monitor.tool_state_review_cycles,
2290
+ )
2209
2291
  return "required", "review_complete"
2210
2292
 
2211
2293
  monitor.tool_state_auto_budget_remaining -= 1
@@ -2416,6 +2498,9 @@ def build_openai_request(
2416
2498
  n_msgs = len(anthropic_body.get("messages", []))
2417
2499
  has_tool_results = _conversation_has_tool_results(anthropic_body)
2418
2500
 
2501
+ # Detect and strip synthetic finalize continuation before fingerprinting
2502
+ _detect_and_strip_synthetic_continuation(anthropic_body, monitor)
2503
+
2419
2504
  # Record tool calls from the last assistant message for loop detection
2420
2505
  latest_tool_fingerprint = _record_last_assistant_tool_calls(
2421
2506
  anthropic_body, monitor
@@ -2524,24 +2609,31 @@ def build_openai_request(
2524
2609
  cycling_names,
2525
2610
  )
2526
2611
  # Option 2: Narrow tools during review to exclude cycling tools
2612
+ # Option 1 enhancement: if any cycling tool is read-only, exclude
2613
+ # the entire read-only class to prevent tool-hopping (read→glob→grep)
2527
2614
  if (
2528
2615
  monitor.tool_turn_phase == "review"
2529
2616
  and monitor.cycling_tool_names
2530
2617
  and "tools" in openai_body
2531
2618
  ):
2619
+ exclude_set = set(monitor.cycling_tool_names)
2620
+ # Expand to full read-only class if any cycling tool is read-only
2621
+ if any(n.lower() in {c.lower() for c in _READ_ONLY_TOOL_CLASS} for n in exclude_set):
2622
+ exclude_set |= _READ_ONLY_TOOL_CLASS
2532
2623
  original_count = len(openai_body["tools"])
2533
2624
  narrowed = [
2534
2625
  t
2535
2626
  for t in openai_body["tools"]
2536
- if t.get("function", {}).get("name") not in monitor.cycling_tool_names
2627
+ if t.get("function", {}).get("name") not in exclude_set
2537
2628
  ]
2538
2629
  if narrowed:
2539
2630
  openai_body["tools"] = narrowed
2540
2631
  logger.warning(
2541
- "CYCLE BREAK: narrowed tools from %d to %d (excluded %s)",
2632
+ "CYCLE BREAK: narrowed tools from %d to %d (excluded %s, read_only_class=%s)",
2542
2633
  original_count,
2543
2634
  len(narrowed),
2544
2635
  monitor.cycling_tool_names,
2636
+ any(n.lower() in {c.lower() for c in _READ_ONLY_TOOL_CLASS} for n in monitor.cycling_tool_names),
2545
2637
  )
2546
2638
  else:
2547
2639
  logger.warning(
@@ -2602,13 +2694,117 @@ def build_openai_request(
2602
2694
  return openai_body
2603
2695
 
2604
2696
 
2697
+ def _tool_call_fingerprint(block: dict) -> str:
2698
+ """Create a fingerprint for a tool call that includes both name and a
2699
+ short hash of the arguments. This prevents false cycle detection when
2700
+ the same tool is called with different arguments (e.g. reading different
2701
+ files)."""
2702
+ name = block.get("name", "unknown")
2703
+ inp = block.get("input")
2704
+ if inp:
2705
+ arg_str = json.dumps(inp, sort_keys=True, separators=(",", ":"))
2706
+ arg_hash = hashlib.md5(arg_str.encode()).hexdigest()[:8]
2707
+ return f"{name}:{arg_hash}"
2708
+ return name
2709
+
2710
+
2711
+ def _detect_and_strip_synthetic_continuation(
2712
+ anthropic_body: dict, monitor: SessionMonitor
2713
+ ) -> bool:
2714
+ """Detect if the latest messages contain a synthetic finalize continuation
2715
+ tool_use/tool_result pair. If found, strip them from the conversation and
2716
+ reset the state machine so the model gets a fresh act cycle.
2717
+
2718
+ Returns True if a synthetic continuation was detected and handled.
2719
+ """
2720
+ synthetic_id = monitor.finalize_synthetic_tool_id
2721
+ if not synthetic_id:
2722
+ return False
2723
+
2724
+ messages = anthropic_body.get("messages", [])
2725
+ if not messages:
2726
+ return False
2727
+
2728
+ # Walk backwards to find the synthetic tool_result in a user message
2729
+ found = False
2730
+ for msg in reversed(messages):
2731
+ if msg.get("role") != "user":
2732
+ continue
2733
+ content = msg.get("content")
2734
+ if not isinstance(content, list):
2735
+ break
2736
+ has_synthetic = any(
2737
+ isinstance(b, dict)
2738
+ and b.get("type") == "tool_result"
2739
+ and b.get("tool_use_id") == synthetic_id
2740
+ for b in content
2741
+ )
2742
+ if not has_synthetic:
2743
+ break
2744
+
2745
+ # Strip synthetic tool_result from user message
2746
+ new_content = [
2747
+ b for b in content
2748
+ if not (
2749
+ isinstance(b, dict)
2750
+ and b.get("type") == "tool_result"
2751
+ and b.get("tool_use_id") == synthetic_id
2752
+ )
2753
+ ]
2754
+ if not new_content:
2755
+ msg["content"] = [{"type": "text", "text": "Continue working on the task."}]
2756
+ else:
2757
+ msg["content"] = new_content
2758
+
2759
+ # Strip synthetic tool_use from the preceding assistant message
2760
+ for asst_msg in reversed(messages):
2761
+ if asst_msg.get("role") != "assistant":
2762
+ continue
2763
+ asst_content = asst_msg.get("content")
2764
+ if isinstance(asst_content, list):
2765
+ asst_msg["content"] = [
2766
+ b for b in asst_content
2767
+ if not (
2768
+ isinstance(b, dict)
2769
+ and b.get("type") == "tool_use"
2770
+ and b.get("id") == synthetic_id
2771
+ )
2772
+ ]
2773
+ break
2774
+
2775
+ found = True
2776
+ break
2777
+
2778
+ if not found:
2779
+ return False
2780
+
2781
+ # Reset state machine for fresh act cycle
2782
+ monitor.finalize_synthetic_tool_id = ""
2783
+ monitor.reset_tool_turn_state(reason="finalize_continuation_resume")
2784
+ monitor.reset_completion_recovery()
2785
+ monitor.tool_call_history = []
2786
+ logger.info(
2787
+ "FINALIZE CONTINUATION: stripped synthetic tool id=%s, "
2788
+ "reset state machine for fresh act cycle (continuations=%d/%d)",
2789
+ synthetic_id,
2790
+ monitor.finalize_continuation_count,
2791
+ PROXY_FINALIZE_CONTINUATION_MAX,
2792
+ )
2793
+ return True
2794
+
2795
+
2605
2796
  def _record_last_assistant_tool_calls(
2606
2797
  anthropic_body: dict, monitor: SessionMonitor
2607
2798
  ) -> str:
2608
2799
  """Extract tool call names from the last assistant message and record
2609
- them in the session monitor for loop detection."""
2800
+ them in the session monitor for loop detection.
2801
+
2802
+ Fingerprints now include an argument hash so that the same tool called
2803
+ with different arguments (e.g. read(file_a) vs read(file_b)) produces
2804
+ distinct fingerprints, preventing false cycle/stagnation detection."""
2610
2805
  messages = anthropic_body.get("messages", [])
2611
- tool_names = []
2806
+ tool_fingerprints = []
2807
+ tool_targets: dict[str, str] = {}
2612
2808
  for msg in reversed(messages):
2613
2809
  if msg.get("role") != "assistant":
2614
2810
  continue
@@ -2616,11 +2812,28 @@ def _record_last_assistant_tool_calls(
2616
2812
  if isinstance(content, list):
2617
2813
  for block in content:
2618
2814
  if isinstance(block, dict) and block.get("type") == "tool_use":
2619
- tool_names.append(block.get("name", "unknown"))
2815
+ tool_fingerprints.append(_tool_call_fingerprint(block))
2816
+ # Extract target key for read-only dedup (Option 3)
2817
+ name = block.get("name", "unknown")
2818
+ inp = block.get("input", {})
2819
+ if isinstance(inp, dict):
2820
+ target = (
2821
+ inp.get("file_path")
2822
+ or inp.get("path")
2823
+ or inp.get("pattern")
2824
+ or inp.get("command", "")[:80]
2825
+ )
2826
+ if target:
2827
+ tool_targets[name] = str(target)
2620
2828
  break
2621
- if tool_names:
2622
- monitor.record_tool_calls(tool_names)
2623
- return "|".join(sorted(tool_names))
2829
+ if tool_fingerprints:
2830
+ fingerprint = "|".join(sorted(tool_fingerprints))
2831
+ monitor.record_tool_calls(
2832
+ [fp.split(":")[0] for fp in tool_fingerprints],
2833
+ tool_targets=tool_targets,
2834
+ fingerprint=fingerprint,
2835
+ )
2836
+ return fingerprint
2624
2837
  return ""
2625
2838
 
2626
2839
 
@@ -4750,16 +4963,20 @@ def _maybe_extract_text_tool_calls(openai_resp: dict) -> dict:
4750
4963
  return openai_resp
4751
4964
 
4752
4965
 
4753
- def _detect_and_truncate_degenerate_repetition(openai_resp: dict) -> dict:
4966
+ def _detect_and_truncate_degenerate_repetition(
4967
+ openai_resp: dict,
4968
+ ) -> tuple[dict, bool]:
4754
4969
  """Detect degenerate repetitive text and truncate at first repetition.
4755
4970
 
4756
4971
  When the model produces highly repetitive output (e.g. the same 20+ char
4757
4972
  substring repeated 10+ times), truncate at the first repetition boundary
4758
4973
  and set finish_reason to stop.
4974
+
4975
+ Returns (response, was_degenerate) so the caller can retry if needed.
4759
4976
  """
4760
4977
  text = _openai_message_text(openai_resp)
4761
4978
  if not text or len(text) < 200:
4762
- return openai_resp
4979
+ return openai_resp, False
4763
4980
 
4764
4981
  # Look for repeated substrings of length 20-100
4765
4982
  for substr_len in (60, 40, 20):
@@ -4788,8 +5005,70 @@ def _detect_and_truncate_degenerate_repetition(openai_resp: dict) -> dict:
4788
5005
  msg = choices[0].get("message", {})
4789
5006
  msg["content"] = truncated
4790
5007
  choices[0]["finish_reason"] = "stop"
4791
- return openai_resp
4792
- return openai_resp
5008
+ return openai_resp, True
5009
+ return openai_resp, False
5010
+
5011
+
5012
+ def _client_has_tool(anthropic_body: dict, tool_name: str) -> bool:
5013
+ """Check if the client's tool list contains a tool with the given name (case-insensitive)."""
5014
+ lower = tool_name.lower()
5015
+ return any(
5016
+ (t.get("name") or "").lower() == lower for t in anthropic_body.get("tools", [])
5017
+ )
5018
+
5019
+
5020
+ def _client_tool_name(anthropic_body: dict, tool_name: str) -> str:
5021
+ """Return the actual tool name as the client spells it (case-sensitive match)."""
5022
+ lower = tool_name.lower()
5023
+ for t in anthropic_body.get("tools", []):
5024
+ if (t.get("name") or "").lower() == lower:
5025
+ return t["name"]
5026
+ return tool_name
5027
+
5028
+
5029
+ def _inject_synthetic_continuation(
5030
+ anthropic_resp: dict, monitor: SessionMonitor, anthropic_body: dict
5031
+ ) -> dict:
5032
+ """Inject a synthetic tool_use into a finalize-turn response to keep the
5033
+ client's agentic loop alive.
5034
+
5035
+ Appends a no-op Read("/dev/null") tool_use block and changes stop_reason
5036
+ from "end_turn" to "tool_use" so the client continues sending requests.
5037
+ """
5038
+ # Pick a safe tool the client knows about (case-insensitive match,
5039
+ # then use the client's actual casing for the tool name)
5040
+ if _client_has_tool(anthropic_body, "read"):
5041
+ tool_name = _client_tool_name(anthropic_body, "read")
5042
+ tool_input = {"file_path": "/dev/null"}
5043
+ elif _client_has_tool(anthropic_body, "bash"):
5044
+ tool_name = _client_tool_name(anthropic_body, "bash")
5045
+ tool_input = {"command": "true", "description": "continuation ping"}
5046
+ else:
5047
+ logger.warning("FINALIZE CONTINUATION: no suitable tool found, skipping injection")
5048
+ return anthropic_resp
5049
+
5050
+ synthetic_id = f"toolu_{uuid.uuid4().hex[:12]}"
5051
+ monitor.finalize_synthetic_tool_id = synthetic_id
5052
+ monitor.finalize_continuation_count += 1
5053
+
5054
+ content = anthropic_resp.get("content", [])
5055
+ content.append({
5056
+ "type": "tool_use",
5057
+ "id": synthetic_id,
5058
+ "name": tool_name,
5059
+ "input": tool_input,
5060
+ })
5061
+ anthropic_resp["content"] = content
5062
+ anthropic_resp["stop_reason"] = "tool_use"
5063
+
5064
+ logger.info(
5065
+ "FINALIZE CONTINUATION: injected synthetic %s tool_use id=%s (count=%d/%d)",
5066
+ tool_name,
5067
+ synthetic_id,
5068
+ monitor.finalize_continuation_count,
5069
+ PROXY_FINALIZE_CONTINUATION_MAX,
5070
+ )
5071
+ return anthropic_resp
4793
5072
 
4794
5073
 
4795
5074
  def openai_to_anthropic_response(openai_resp: dict, model: str) -> dict:
@@ -5623,8 +5902,51 @@ async def messages(request: Request):
5623
5902
  session_id,
5624
5903
  )
5625
5904
 
5626
- openai_resp = _detect_and_truncate_degenerate_repetition(openai_resp)
5905
+ openai_resp, was_degenerate = _detect_and_truncate_degenerate_repetition(openai_resp)
5906
+ if was_degenerate:
5907
+ # Retry with constrained parameters to avoid degenerate output.
5908
+ # With tools: force tool_choice=required for a useful tool call.
5909
+ # Without tools (finalize): retry with capped max_tokens for clean text.
5910
+ has_tools = bool(strict_body.get("tools"))
5911
+ retry_body = dict(strict_body)
5912
+ retry_body["max_tokens"] = 2048
5913
+ retry_body["temperature"] = 0.1
5914
+ retry_body["stream"] = False
5915
+ if has_tools:
5916
+ retry_body["tool_choice"] = "required"
5917
+ logger.warning("DEGENERATE RETRY: retrying with tool_choice=required max_tokens=2048")
5918
+ else:
5919
+ logger.warning("DEGENERATE RETRY: retrying text-only with max_tokens=2048 temp=0.1")
5920
+ try:
5921
+ retry_resp = await _post_with_generation_timeout(
5922
+ client, f"{LLAMA_CPP_BASE}/chat/completions", retry_body,
5923
+ {"Content-Type": "application/json"},
5924
+ )
5925
+ if retry_resp.status_code == 200:
5926
+ retry_data = retry_resp.json()
5927
+ retry_text = _openai_message_text(retry_data)
5928
+ _, retry_degenerate = _detect_and_truncate_degenerate_repetition(retry_data)
5929
+ if retry_degenerate:
5930
+ logger.info("DEGENERATE RETRY: retry also degenerate, using truncated original")
5931
+ elif has_tools and (retry_data.get("choices", [{}])[0]
5932
+ .get("message", {}).get("tool_calls")):
5933
+ logger.info("DEGENERATE RETRY: success, got tool call")
5934
+ openai_resp = retry_data
5935
+ elif not has_tools and retry_text and len(retry_text) > 50:
5936
+ logger.info("DEGENERATE RETRY: success, got clean text (%d chars)", len(retry_text))
5937
+ openai_resp = retry_data
5938
+ else:
5939
+ logger.info("DEGENERATE RETRY: retry insufficient, using truncated original")
5940
+ except Exception as exc:
5941
+ logger.warning("DEGENERATE RETRY: failed: %s", exc)
5627
5942
  anthropic_resp = openai_to_anthropic_response(openai_resp, model)
5943
+ # FINALIZE CONTINUATION: inject synthetic tool_use to keep client loop alive
5944
+ if (
5945
+ monitor.finalize_turn_active
5946
+ and monitor.finalize_continuation_count < PROXY_FINALIZE_CONTINUATION_MAX
5947
+ and anthropic_resp.get("stop_reason") == "end_turn"
5948
+ ):
5949
+ anthropic_resp = _inject_synthetic_continuation(anthropic_resp, monitor, body)
5628
5950
  monitor.record_response(anthropic_resp.get("usage", {}).get("output_tokens", 0))
5629
5951
  # Update last_input_tokens from upstream's actual prompt_tokens
5630
5952
  upstream_input = anthropic_resp.get("usage", {}).get("input_tokens", 0)
@@ -5962,8 +6284,38 @@ async def messages(request: Request):
5962
6284
  monitor.invalid_tool_call_streak = 0
5963
6285
  monitor.required_tool_miss_streak = 0
5964
6286
 
5965
- openai_resp = _detect_and_truncate_degenerate_repetition(openai_resp)
6287
+ openai_resp, was_degenerate = _detect_and_truncate_degenerate_repetition(openai_resp)
6288
+ # Degenerate retry for non-guarded stream path
6289
+ if was_degenerate and openai_body.get("tools"):
6290
+ logger.warning("DEGENERATE RETRY (stream): retrying with tool_choice=required max_tokens=2048")
6291
+ retry_body = dict(openai_body)
6292
+ retry_body["tool_choice"] = "required"
6293
+ retry_body["max_tokens"] = 2048
6294
+ retry_body["temperature"] = 0.1
6295
+ retry_body["stream"] = False
6296
+ try:
6297
+ retry_resp = await _post_with_generation_timeout(
6298
+ client, f"{LLAMA_CPP_BASE}/chat/completions", retry_body,
6299
+ {"Content-Type": "application/json"},
6300
+ )
6301
+ if retry_resp.status_code == 200:
6302
+ retry_data = retry_resp.json()
6303
+ if (retry_data.get("choices", [{}])[0]
6304
+ .get("message", {}).get("tool_calls")):
6305
+ logger.info("DEGENERATE RETRY (stream): success, got tool call")
6306
+ openai_resp = retry_data
6307
+ else:
6308
+ logger.info("DEGENERATE RETRY (stream): no tool call, using truncated")
6309
+ except Exception as exc:
6310
+ logger.warning("DEGENERATE RETRY (stream): failed: %s", exc)
5966
6311
  anthropic_resp = openai_to_anthropic_response(openai_resp, model)
6312
+ # FINALIZE CONTINUATION: inject synthetic tool_use (non-guarded stream path)
6313
+ if (
6314
+ monitor.finalize_turn_active
6315
+ and monitor.finalize_continuation_count < PROXY_FINALIZE_CONTINUATION_MAX
6316
+ and anthropic_resp.get("stop_reason") == "end_turn"
6317
+ ):
6318
+ anthropic_resp = _inject_synthetic_continuation(anthropic_resp, monitor, body)
5967
6319
 
5968
6320
  # Track output tokens in session monitor
5969
6321
  output_tokens = anthropic_resp.get("usage", {}).get("output_tokens", 0)
@@ -1892,12 +1892,13 @@ class TestToolTurnControls(unittest.TestCase):
1892
1892
  monitor = proxy.SessionMonitor(context_window=262144)
1893
1893
  monitor.tool_turn_phase = "act"
1894
1894
  monitor.tool_state_forced_budget_remaining = 20
1895
+ # Use hash-format fingerprints to match _tool_call_fingerprint output
1895
1896
  monitor.tool_call_history = [
1896
- "Bash",
1897
+ "Bash:1e7b8d07",
1897
1898
  "TaskOutput",
1898
- "Bash",
1899
+ "Bash:1e7b8d07",
1899
1900
  "TaskOutput",
1900
- "Bash",
1901
+ "Bash:1e7b8d07",
1901
1902
  "TaskOutput",
1902
1903
  ]
1903
1904
  monitor.last_tool_fingerprint = "TaskOutput"
@@ -2076,7 +2077,9 @@ class TestToolTurnControls(unittest.TestCase):
2076
2077
  # Review phase now keeps required to prevent end-turn escape
2077
2078
  self.assertEqual(openai.get("tool_choice"), "required")
2078
2079
  self.assertEqual(monitor.tool_turn_phase, "review")
2079
- self.assertEqual(monitor.tool_state_review_cycles, 1)
2080
+ # review_cycles only increments when cycle_looping or stagnating,
2081
+ # not on mere budget exhaustion (model was working, not cycling)
2082
+ self.assertEqual(monitor.tool_state_review_cycles, 0)
2080
2083
  finally:
2081
2084
  setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
2082
2085
  setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
@@ -2242,7 +2245,11 @@ class TestToolTurnControls(unittest.TestCase):
2242
2245
  monitor = proxy.SessionMonitor(context_window=262144)
2243
2246
  monitor.tool_turn_phase = "act"
2244
2247
  monitor.tool_state_stagnation_streak = 4
2245
- monitor.tool_call_history = ["Bash", "TaskOutput", "Bash", "TaskOutput"]
2248
+ # Use hash-format fingerprints to match _tool_call_fingerprint output
2249
+ monitor.tool_call_history = [
2250
+ "Bash:1e7b8d07", "TaskOutput", "Bash:1e7b8d07", "TaskOutput",
2251
+ "Bash:1e7b8d07", "TaskOutput",
2252
+ ]
2246
2253
  monitor.last_tool_fingerprint = "TaskOutput"
2247
2254
 
2248
2255
  body = {
@@ -3262,8 +3269,11 @@ class TestCycleBreakOptions(unittest.TestCase):
3262
3269
  monitor = proxy.SessionMonitor(context_window=262144)
3263
3270
  monitor.tool_turn_phase = "act"
3264
3271
  monitor.tool_state_forced_budget_remaining = 20
3265
- monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
3266
- monitor.last_tool_fingerprint = "Bash"
3272
+ # Hash-format fingerprints matching Bash+{"command":"ls"}
3273
+ monitor.tool_call_history = [
3274
+ "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad",
3275
+ ]
3276
+ monitor.last_tool_fingerprint = "Bash:781c24ad"
3267
3277
 
3268
3278
  body = {
3269
3279
  "model": "test",
@@ -3323,8 +3333,11 @@ class TestCycleBreakOptions(unittest.TestCase):
3323
3333
  monitor = proxy.SessionMonitor(context_window=262144)
3324
3334
  monitor.tool_turn_phase = "act"
3325
3335
  monitor.tool_state_forced_budget_remaining = 20
3326
- monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
3327
- monitor.last_tool_fingerprint = "Bash"
3336
+ # Hash-format fingerprints matching Bash+{"command":"ls"}
3337
+ monitor.tool_call_history = [
3338
+ "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad", "Bash:781c24ad",
3339
+ ]
3340
+ monitor.last_tool_fingerprint = "Bash:781c24ad"
3328
3341
 
3329
3342
  body = {
3330
3343
  "model": "test",
@@ -3369,9 +3382,9 @@ class TestCycleBreakOptions(unittest.TestCase):
3369
3382
  """Option 3: default forced budget reduced from 24 to 12."""
3370
3383
  self.assertEqual(proxy.PROXY_TOOL_STATE_FORCED_BUDGET, 12)
3371
3384
 
3372
- def test_review_cycle_limit_default_is_1(self):
3373
- """Option 4: default review cycle limit reduced from 2 to 1."""
3374
- self.assertEqual(proxy.PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT, 1)
3385
+ def test_review_cycle_limit_default_is_3(self):
3386
+ """Option 4: default review cycle limit is 3."""
3387
+ self.assertEqual(proxy.PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT, 3)
3375
3388
 
3376
3389
  def test_cycling_tool_names_cleared_on_reset(self):
3377
3390
  """cycling_tool_names is cleared when tool turn state resets."""
@@ -3450,8 +3463,9 @@ class TestDegenerateRepetitionDetection(unittest.TestCase):
3450
3463
  openai_resp = {
3451
3464
  "choices": [{"message": {"content": repeated}, "finish_reason": "length"}]
3452
3465
  }
3453
- result = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3466
+ result, truncated = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3454
3467
  truncated_text = result["choices"][0]["message"]["content"]
3468
+ self.assertTrue(truncated)
3455
3469
  self.assertLess(len(truncated_text), len(repeated))
3456
3470
  self.assertEqual(result["choices"][0]["finish_reason"], "stop")
3457
3471
 
@@ -3461,7 +3475,8 @@ class TestDegenerateRepetitionDetection(unittest.TestCase):
3461
3475
  openai_resp = {
3462
3476
  "choices": [{"message": {"content": text}, "finish_reason": "stop"}]
3463
3477
  }
3464
- result = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3478
+ result, truncated = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3479
+ self.assertFalse(truncated)
3465
3480
  self.assertEqual(result["choices"][0]["message"]["content"], text)
3466
3481
 
3467
3482
  def test_preserves_short_text(self):
@@ -3470,7 +3485,8 @@ class TestDegenerateRepetitionDetection(unittest.TestCase):
3470
3485
  openai_resp = {
3471
3486
  "choices": [{"message": {"content": text}, "finish_reason": "stop"}]
3472
3487
  }
3473
- result = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3488
+ result, truncated = proxy._detect_and_truncate_degenerate_repetition(openai_resp)
3489
+ self.assertFalse(truncated)
3474
3490
  self.assertEqual(result["choices"][0]["message"]["content"], text)
3475
3491
 
3476
3492
  def test_max_tokens_floor_skipped_for_non_tool_requests(self):
@@ -4220,3 +4236,173 @@ class TestReviewPhaseBootstrapReset(unittest.TestCase):
4220
4236
  # The bootstrap reset only triggers for review phase
4221
4237
  self.assertNotEqual(m.tool_turn_phase, "review")
4222
4238
  # In act phase, the normal guardrail fallback path runs instead
4239
+
4240
+
4241
+ class TestReadOnlyCycleClassExclusion(unittest.TestCase):
4242
+ """Tests for Option 1: read-only tool class exclusion on cycle break,
4243
+ Option 2: reduced cycle window (3), and Option 3: duplicate target dedup."""
4244
+
4245
+ def _make_body_with_tools(self, tool_names):
4246
+ """Build a minimal Anthropic body with named tools and a tool_result."""
4247
+ tools = [
4248
+ {"name": n, "description": f"{n} tool", "input_schema": {"type": "object"}}
4249
+ for n in tool_names
4250
+ ]
4251
+ return {
4252
+ "model": "test",
4253
+ "messages": [
4254
+ {"role": "user", "content": "do something"},
4255
+ {
4256
+ "role": "assistant",
4257
+ "content": [
4258
+ {
4259
+ "type": "tool_use",
4260
+ "id": "toolu_1",
4261
+ "name": tool_names[0],
4262
+ "input": {"file_path": "/some/file.ts"},
4263
+ }
4264
+ ],
4265
+ },
4266
+ {
4267
+ "role": "user",
4268
+ "content": [
4269
+ {"type": "tool_result", "tool_use_id": "toolu_1", "content": "ok"}
4270
+ ],
4271
+ },
4272
+ ],
4273
+ "tools": tools,
4274
+ }
4275
+
4276
+ def test_read_only_class_exclusion_expands(self):
4277
+ """When 'read' is cycling, all read-only tools are excluded, not just 'read'."""
4278
+ old_vals = {
4279
+ "PROXY_TOOL_STATE_MACHINE": getattr(proxy, "PROXY_TOOL_STATE_MACHINE"),
4280
+ "PROXY_TOOL_STATE_MIN_MESSAGES": getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES"),
4281
+ "PROXY_TOOL_STATE_FORCED_BUDGET": getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET"),
4282
+ "PROXY_TOOL_STATE_CYCLE_WINDOW": getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW"),
4283
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD": getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"),
4284
+ }
4285
+ try:
4286
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4287
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4288
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 3)
4289
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4290
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4291
+
4292
+ all_tools = ["read", "glob", "grep", "bash", "write", "edit"]
4293
+ body = self._make_body_with_tools(all_tools)
4294
+ monitor = proxy.SessionMonitor(context_window=262144)
4295
+
4296
+ # Simulate cycling on 'read' by recording 3 identical fingerprints
4297
+ # Hash-format matching read+{"file_path":"/some/file.ts"}
4298
+ fp = "read:cfb28722"
4299
+ monitor.record_tool_calls(["read"], fingerprint=fp)
4300
+ monitor.record_tool_calls(["read"], fingerprint=fp)
4301
+ monitor.record_tool_calls(["read"], fingerprint=fp)
4302
+
4303
+ openai_body = proxy.build_openai_request(body, monitor)
4304
+
4305
+ # After cycle break, the tools in the body should exclude ALL
4306
+ # read-only tools, not just 'read'
4307
+ remaining_names = [
4308
+ t.get("function", {}).get("name") for t in openai_body.get("tools", [])
4309
+ ]
4310
+ self.assertNotIn("read", remaining_names)
4311
+ self.assertNotIn("glob", remaining_names)
4312
+ self.assertNotIn("grep", remaining_names)
4313
+ # Write/action tools should remain
4314
+ self.assertIn("bash", remaining_names)
4315
+ self.assertIn("write", remaining_names)
4316
+ self.assertIn("edit", remaining_names)
4317
+ finally:
4318
+ for k, v in old_vals.items():
4319
+ setattr(proxy, k, v)
4320
+
4321
+ def test_non_read_tool_cycling_no_class_expansion(self):
4322
+ """When 'bash' is cycling, only 'bash' is excluded, not read-only tools."""
4323
+ old_vals = {
4324
+ "PROXY_TOOL_STATE_MACHINE": getattr(proxy, "PROXY_TOOL_STATE_MACHINE"),
4325
+ "PROXY_TOOL_STATE_MIN_MESSAGES": getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES"),
4326
+ "PROXY_TOOL_STATE_FORCED_BUDGET": getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET"),
4327
+ "PROXY_TOOL_STATE_CYCLE_WINDOW": getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW"),
4328
+ "PROXY_TOOL_STATE_STAGNATION_THRESHOLD": getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD"),
4329
+ }
4330
+ try:
4331
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4332
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4333
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 3)
4334
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 3)
4335
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 2)
4336
+
4337
+ all_tools = ["read", "glob", "grep", "bash", "write", "edit"]
4338
+ body = self._make_body_with_tools(all_tools)
4339
+ # Change the assistant tool_use to bash
4340
+ body["messages"][1]["content"][0]["name"] = "bash"
4341
+ body["messages"][1]["content"][0]["input"] = {"command": "ls"}
4342
+ monitor = proxy.SessionMonitor(context_window=262144)
4343
+
4344
+ # Use hash-format fingerprints matching bash+{"command":"ls"}
4345
+ fp = "bash:781c24ad"
4346
+ monitor.record_tool_calls(["bash"], fingerprint=fp)
4347
+ monitor.record_tool_calls(["bash"], fingerprint=fp)
4348
+ monitor.record_tool_calls(["bash"], fingerprint=fp)
4349
+
4350
+ openai_body = proxy.build_openai_request(body, monitor)
4351
+
4352
+ remaining_names = [
4353
+ t.get("function", {}).get("name") for t in openai_body.get("tools", [])
4354
+ ]
4355
+ self.assertNotIn("bash", remaining_names)
4356
+ # Read-only tools should still be available
4357
+ self.assertIn("read", remaining_names)
4358
+ self.assertIn("glob", remaining_names)
4359
+ self.assertIn("grep", remaining_names)
4360
+ finally:
4361
+ for k, v in old_vals.items():
4362
+ setattr(proxy, k, v)
4363
+
4364
+ def test_duplicate_read_target_triggers_early_cycle(self):
4365
+ """Option 3: reading same file 3+ times triggers early cycle break."""
4366
+ monitor = proxy.SessionMonitor(context_window=262144)
4367
+
4368
+ # Record 3 reads of same target
4369
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/to/file.ts"})
4370
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/to/file.ts"})
4371
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/to/file.ts"})
4372
+
4373
+ dup, tool = monitor.has_duplicate_read_target(threshold=3)
4374
+ self.assertTrue(dup)
4375
+ self.assertEqual(tool, "read")
4376
+
4377
+ def test_different_read_targets_no_duplicate(self):
4378
+ """Option 3: reading different files does NOT trigger duplicate detection."""
4379
+ monitor = proxy.SessionMonitor(context_window=262144)
4380
+
4381
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/a.ts"})
4382
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/b.ts"})
4383
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/path/c.ts"})
4384
+
4385
+ dup, _ = monitor.has_duplicate_read_target(threshold=3)
4386
+ self.assertFalse(dup)
4387
+
4388
+ def test_cycle_window_default_is_3(self):
4389
+ """Option 2: verify default cycle window is now 3."""
4390
+ # This tests the constant directly
4391
+ self.assertEqual(
4392
+ int(proxy.os.environ.get("PROXY_TOOL_STATE_CYCLE_WINDOW", "3")), 3
4393
+ )
4394
+
4395
+ def test_target_history_reset_on_state_reset(self):
4396
+ """Target history is cleared when tool state resets."""
4397
+ monitor = proxy.SessionMonitor(context_window=262144)
4398
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/file.ts"})
4399
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/file.ts"})
4400
+ monitor.record_tool_calls(["read"], tool_targets={"read": "/file.ts"})
4401
+
4402
+ dup, _ = monitor.has_duplicate_read_target(threshold=3)
4403
+ self.assertTrue(dup)
4404
+
4405
+ monitor.reset_tool_turn_state(reason="test_reset")
4406
+
4407
+ dup, _ = monitor.has_duplicate_read_target(threshold=3)
4408
+ self.assertFalse(dup)