@miller-tech/uap 1.20.12 → 1.20.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.12",
3
+ "version": "1.20.14",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -143,7 +143,7 @@ PROXY_TOOL_STATE_MIN_MESSAGES = int(
143
143
  os.environ.get("PROXY_TOOL_STATE_MIN_MESSAGES", "6")
144
144
  )
145
145
  PROXY_TOOL_STATE_FORCED_BUDGET = int(
146
- os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "24")
146
+ os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "12")
147
147
  )
148
148
  PROXY_TOOL_STATE_AUTO_BUDGET = int(os.environ.get("PROXY_TOOL_STATE_AUTO_BUDGET", "2"))
149
149
  PROXY_TOOL_STATE_STAGNATION_THRESHOLD = int(
@@ -156,7 +156,7 @@ PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
156
156
  os.environ.get("PROXY_TOOL_STATE_FINALIZE_THRESHOLD", "18")
157
157
  )
158
158
  PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
159
- os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "2")
159
+ os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "1")
160
160
  )
161
161
  PROXY_CLIENT_RATE_WINDOW_SECS = int(
162
162
  os.environ.get("PROXY_CLIENT_RATE_WINDOW_SECS", "60")
@@ -219,7 +219,7 @@ PROXY_MALFORMED_TOOL_GUARDRAIL = os.environ.get(
219
219
  "no",
220
220
  }
221
221
  PROXY_MALFORMED_TOOL_RETRY_MAX = int(
222
- os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX", "2")
222
+ os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX", "3")
223
223
  )
224
224
  PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS = int(
225
225
  os.environ.get("PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS", "2048")
@@ -628,6 +628,7 @@ class SessionMonitor:
628
628
  tool_state_transitions: int = 0
629
629
  tool_state_review_cycles: int = 0
630
630
  last_tool_fingerprint: str = ""
631
+ cycling_tool_names: list = field(default_factory=list)
631
632
  finalize_turn_active: bool = False
632
633
  completion_required: bool = False
633
634
  completion_pending: bool = False
@@ -832,6 +833,7 @@ class SessionMonitor:
832
833
  self.tool_state_auto_budget_remaining = 0
833
834
  self.tool_state_stagnation_streak = 0
834
835
  self.tool_state_review_cycles = 0
836
+ self.cycling_tool_names = []
835
837
  self.last_tool_fingerprint = ""
836
838
 
837
839
  def update_completion_state(self, anthropic_body: dict, has_tool_results: bool):
@@ -2053,12 +2055,17 @@ def _resolve_state_machine_tool_choice(
2053
2055
  monitor.tool_state_forced_budget_remaining = max(
2054
2056
  1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
2055
2057
  )
2058
+ # Capture which tools are cycling for narrowing/hint injection
2059
+ window = max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
2060
+ recent = [fp for fp in monitor.tool_call_history[-window:] if fp]
2061
+ monitor.cycling_tool_names = list(dict.fromkeys(recent))
2056
2062
  logger.warning(
2057
- "TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d)",
2063
+ "TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d cycling_tools=%s)",
2058
2064
  cycle_looping,
2059
2065
  cycle_repeat,
2060
2066
  monitor.tool_state_stagnation_streak,
2061
2067
  monitor.tool_state_review_cycles,
2068
+ monitor.cycling_tool_names,
2062
2069
  )
2063
2070
  return "required", reason
2064
2071
 
@@ -2349,6 +2356,49 @@ def build_openai_request(
2349
2356
  monitor.no_progress_streak = (
2350
2357
  0 if last_user_has_tool_result else monitor.no_progress_streak + 1
2351
2358
  )
2359
+ # Option 1: Inject cycle-break instruction when entering review
2360
+ if (
2361
+ monitor.tool_turn_phase == "review"
2362
+ and state_reason in {"cycle_detected", "stagnation"}
2363
+ and monitor.cycling_tool_names
2364
+ ):
2365
+ cycling_names = ", ".join(monitor.cycling_tool_names)
2366
+ cycle_hint = (
2367
+ f"You have been repeatedly calling the same tool(s): {cycling_names}. "
2368
+ "This is not making progress. Use a DIFFERENT tool to advance the task, "
2369
+ "or call a tool that produces your final answer."
2370
+ )
2371
+ messages = openai_body.get("messages", [])
2372
+ messages.append({"role": "user", "content": cycle_hint})
2373
+ openai_body["messages"] = messages
2374
+ logger.warning(
2375
+ "CYCLE BREAK: injected hint about cycling tools: %s",
2376
+ cycling_names,
2377
+ )
2378
+ # Option 2: Narrow tools during review to exclude cycling tools
2379
+ if (
2380
+ monitor.tool_turn_phase == "review"
2381
+ and monitor.cycling_tool_names
2382
+ and "tools" in openai_body
2383
+ ):
2384
+ original_count = len(openai_body["tools"])
2385
+ narrowed = [
2386
+ t
2387
+ for t in openai_body["tools"]
2388
+ if t.get("function", {}).get("name") not in monitor.cycling_tool_names
2389
+ ]
2390
+ if narrowed:
2391
+ openai_body["tools"] = narrowed
2392
+ logger.warning(
2393
+ "CYCLE BREAK: narrowed tools from %d to %d (excluded %s)",
2394
+ original_count,
2395
+ len(narrowed),
2396
+ monitor.cycling_tool_names,
2397
+ )
2398
+ else:
2399
+ logger.warning(
2400
+ "CYCLE BREAK: cannot narrow tools — all tools are cycling, keeping original set",
2401
+ )
2352
2402
  logger.info(
2353
2403
  "tool_choice forced to 'required' by TOOL STATE MACHINE (phase=%s reason=%s forced_budget=%d)",
2354
2404
  monitor.tool_turn_phase,
@@ -3840,6 +3890,40 @@ async def _apply_completion_contract_guardrail(
3840
3890
  return retried
3841
3891
 
3842
3892
 
3893
+ def _sanitize_assistant_messages_for_retry(messages: list[dict]) -> list[dict]:
3894
+ """Strip malformed tool-like text from assistant messages to prevent copy-contamination.
3895
+
3896
+ Only sanitizes the last 4 assistant messages to avoid excessive processing.
3897
+ """
3898
+ import re
3899
+
3900
+ # Patterns that indicate malformed tool call text in assistant content
3901
+ _TOOL_LIKE_PATTERNS = re.compile(
3902
+ r"<tool_call>.*?</tool_call>"
3903
+ r"|<function_call>.*?</function_call>"
3904
+ r'|\{"name"\s*:\s*"[^"]+"\s*,\s*"arguments"\s*:'
3905
+ r"|```json\s*\{[^}]*\"name\"\s*:",
3906
+ re.DOTALL,
3907
+ )
3908
+
3909
+ result = list(messages)
3910
+ sanitized_count = 0
3911
+ for i in range(len(result) - 1, -1, -1):
3912
+ if sanitized_count >= 4:
3913
+ break
3914
+ msg = result[i]
3915
+ if msg.get("role") != "assistant":
3916
+ continue
3917
+ content = msg.get("content", "")
3918
+ if isinstance(content, str) and _TOOL_LIKE_PATTERNS.search(content):
3919
+ cleaned = _TOOL_LIKE_PATTERNS.sub("", content).strip()
3920
+ if not cleaned:
3921
+ cleaned = "I will use the appropriate tool."
3922
+ result[i] = {**msg, "content": cleaned}
3923
+ sanitized_count += 1
3924
+ return result
3925
+
3926
+
3843
3927
  def _build_malformed_retry_body(
3844
3928
  openai_body: dict,
3845
3929
  anthropic_body: dict,
@@ -3851,7 +3935,11 @@ def _build_malformed_retry_body(
3851
3935
  retry_body = dict(openai_body)
3852
3936
  retry_body["stream"] = False
3853
3937
  retry_body["tool_choice"] = tool_choice
3854
- retry_body["temperature"] = PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE
3938
+ # Escalate temperature down on successive retries for more deterministic output
3939
+ if total_attempts > 1 and attempt > 1:
3940
+ retry_body["temperature"] = 0.0
3941
+ else:
3942
+ retry_body["temperature"] = PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE
3855
3943
 
3856
3944
  if tool_choice == "required":
3857
3945
  retry_instruction = (
@@ -3872,7 +3960,10 @@ def _build_malformed_retry_body(
3872
3960
  }
3873
3961
  existing_messages = retry_body.get("messages")
3874
3962
  if isinstance(existing_messages, list) and existing_messages:
3875
- retry_body["messages"] = [*existing_messages, malformed_retry_instruction]
3963
+ # Strip malformed tool-like text from assistant messages to prevent
3964
+ # the model from copying contaminated patterns on retry
3965
+ sanitized = _sanitize_assistant_messages_for_retry(existing_messages)
3966
+ retry_body["messages"] = [*sanitized, malformed_retry_instruction]
3876
3967
 
3877
3968
  if PROXY_MALFORMED_TOOL_RETRY_MAX_TOKENS > 0:
3878
3969
  current_max = int(
@@ -3236,6 +3236,207 @@ class TestPruningImprovements(unittest.TestCase):
3236
3236
  self.assertEqual(monitor.tool_turn_phase, "finalize")
3237
3237
 
3238
3238
 
3239
+ class TestCycleBreakOptions(unittest.TestCase):
3240
+ """Tests for cycle-break options: hint injection, tool narrowing, reduced budgets."""
3241
+
3242
+ def test_cycle_break_injects_hint_message(self):
3243
+ """Option 1: cycle detection injects a user hint about the cycling tools."""
3244
+ old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
3245
+ old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
3246
+ old_forced = getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET")
3247
+ old_auto = getattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET")
3248
+ old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
3249
+ old_cycle_window = getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW")
3250
+ try:
3251
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
3252
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
3253
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
3254
+ setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", 2)
3255
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 99)
3256
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 4)
3257
+
3258
+ monitor = proxy.SessionMonitor(context_window=262144)
3259
+ monitor.tool_turn_phase = "act"
3260
+ monitor.tool_state_forced_budget_remaining = 20
3261
+ monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
3262
+ monitor.last_tool_fingerprint = "Bash"
3263
+
3264
+ body = {
3265
+ "model": "test",
3266
+ "messages": [
3267
+ {"role": "user", "content": "start"},
3268
+ {
3269
+ "role": "assistant",
3270
+ "content": [
3271
+ {"type": "tool_use", "id": "t1", "name": "Bash", "input": {"command": "ls"}},
3272
+ ],
3273
+ },
3274
+ {
3275
+ "role": "user",
3276
+ "content": [
3277
+ {"type": "tool_result", "tool_use_id": "t1", "content": "ok"},
3278
+ ],
3279
+ },
3280
+ ],
3281
+ "tools": [
3282
+ {"name": "Bash", "description": "Run command", "input_schema": {"type": "object"}},
3283
+ {"name": "Read", "description": "Read file", "input_schema": {"type": "object"}},
3284
+ ],
3285
+ }
3286
+
3287
+ openai = proxy.build_openai_request(body, monitor)
3288
+ self.assertEqual(monitor.tool_turn_phase, "review")
3289
+ # Check that a cycle-break hint was injected
3290
+ messages = openai.get("messages", [])
3291
+ last_msg = messages[-1] if messages else {}
3292
+ self.assertEqual(last_msg.get("role"), "user")
3293
+ self.assertIn("Bash", last_msg.get("content", ""))
3294
+ self.assertIn("DIFFERENT tool", last_msg.get("content", ""))
3295
+ finally:
3296
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
3297
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
3298
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", old_forced)
3299
+ setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", old_auto)
3300
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", old_stagnation)
3301
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", old_cycle_window)
3302
+
3303
+ def test_cycle_break_narrows_tools(self):
3304
+ """Option 2: cycling tools are excluded from the tools array during review."""
3305
+ old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
3306
+ old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
3307
+ old_forced = getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET")
3308
+ old_auto = getattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET")
3309
+ old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
3310
+ old_cycle_window = getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW")
3311
+ try:
3312
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
3313
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
3314
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
3315
+ setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", 2)
3316
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 99)
3317
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 4)
3318
+
3319
+ monitor = proxy.SessionMonitor(context_window=262144)
3320
+ monitor.tool_turn_phase = "act"
3321
+ monitor.tool_state_forced_budget_remaining = 20
3322
+ monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
3323
+ monitor.last_tool_fingerprint = "Bash"
3324
+
3325
+ body = {
3326
+ "model": "test",
3327
+ "messages": [
3328
+ {"role": "user", "content": "start"},
3329
+ {
3330
+ "role": "assistant",
3331
+ "content": [
3332
+ {"type": "tool_use", "id": "t1", "name": "Bash", "input": {"command": "ls"}},
3333
+ ],
3334
+ },
3335
+ {
3336
+ "role": "user",
3337
+ "content": [
3338
+ {"type": "tool_result", "tool_use_id": "t1", "content": "ok"},
3339
+ ],
3340
+ },
3341
+ ],
3342
+ "tools": [
3343
+ {"name": "Bash", "description": "Run command", "input_schema": {"type": "object"}},
3344
+ {"name": "Read", "description": "Read file", "input_schema": {"type": "object"}},
3345
+ {"name": "Write", "description": "Write file", "input_schema": {"type": "object"}},
3346
+ ],
3347
+ }
3348
+
3349
+ openai = proxy.build_openai_request(body, monitor)
3350
+ self.assertEqual(monitor.tool_turn_phase, "review")
3351
+ # Bash should be excluded, Read and Write should remain
3352
+ tool_names = [t["function"]["name"] for t in openai.get("tools", [])]
3353
+ self.assertNotIn("Bash", tool_names)
3354
+ self.assertIn("Read", tool_names)
3355
+ self.assertIn("Write", tool_names)
3356
+ finally:
3357
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
3358
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
3359
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", old_forced)
3360
+ setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", old_auto)
3361
+ setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", old_stagnation)
3362
+ setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", old_cycle_window)
3363
+
3364
+ def test_forced_budget_default_is_12(self):
3365
+ """Option 3: default forced budget reduced from 24 to 12."""
3366
+ self.assertEqual(proxy.PROXY_TOOL_STATE_FORCED_BUDGET, 12)
3367
+
3368
+ def test_review_cycle_limit_default_is_1(self):
3369
+ """Option 4: default review cycle limit reduced from 2 to 1."""
3370
+ self.assertEqual(proxy.PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT, 1)
3371
+
3372
+ def test_cycling_tool_names_cleared_on_reset(self):
3373
+ """cycling_tool_names is cleared when tool turn state resets."""
3374
+ monitor = proxy.SessionMonitor(context_window=262144)
3375
+ monitor.cycling_tool_names = ["Bash", "Read"]
3376
+ monitor.reset_tool_turn_state(reason="test")
3377
+ self.assertEqual(monitor.cycling_tool_names, [])
3378
+
3379
+
3380
+ class TestMalformedRetryHardening(unittest.TestCase):
3381
+ """Tests for malformed retry improvements: budget, temp escalation, message sanitization."""
3382
+
3383
+ def test_retry_max_default_is_3(self):
3384
+ """Option 1: default retry budget increased from 2 to 3."""
3385
+ self.assertEqual(proxy.PROXY_MALFORMED_TOOL_RETRY_MAX, 3)
3386
+
3387
+ def test_sanitize_assistant_messages_strips_tool_like_text(self):
3388
+ """Option 3: malformed tool-like text stripped from assistant messages on retry."""
3389
+ messages = [
3390
+ {"role": "system", "content": "You are helpful."},
3391
+ {"role": "user", "content": "Run a command"},
3392
+ {"role": "assistant", "content": 'Here is the result <tool_call>{"name": "Bash", "arguments": {"command": "ls"}}</tool_call>'},
3393
+ {"role": "user", "content": "ok"},
3394
+ ]
3395
+ sanitized = proxy._sanitize_assistant_messages_for_retry(messages)
3396
+ # System and user messages unchanged
3397
+ self.assertEqual(sanitized[0]["content"], "You are helpful.")
3398
+ self.assertEqual(sanitized[1]["content"], "Run a command")
3399
+ self.assertEqual(sanitized[3]["content"], "ok")
3400
+ # Assistant message should have tool_call stripped
3401
+ self.assertNotIn("<tool_call>", sanitized[2]["content"])
3402
+ self.assertNotIn("Bash", sanitized[2]["content"])
3403
+
3404
+ def test_sanitize_preserves_clean_assistant_messages(self):
3405
+ """Clean assistant messages are not modified by sanitization."""
3406
+ messages = [
3407
+ {"role": "assistant", "content": "I will read the file for you."},
3408
+ ]
3409
+ sanitized = proxy._sanitize_assistant_messages_for_retry(messages)
3410
+ self.assertEqual(sanitized[0]["content"], "I will read the file for you.")
3411
+
3412
+ def test_sanitize_replaces_empty_content_with_placeholder(self):
3413
+ """If stripping leaves empty content, a placeholder is used."""
3414
+ messages = [
3415
+ {"role": "assistant", "content": '<tool_call>{"name": "Bash", "arguments": {}}</tool_call>'},
3416
+ ]
3417
+ sanitized = proxy._sanitize_assistant_messages_for_retry(messages)
3418
+ self.assertEqual(sanitized[0]["content"], "I will use the appropriate tool.")
3419
+
3420
+ def test_retry_body_uses_sanitized_messages(self):
3421
+ """Retry body messages are sanitized before adding retry instruction."""
3422
+ openai_body = {
3423
+ "messages": [
3424
+ {"role": "system", "content": "sys"},
3425
+ {"role": "user", "content": "do it"},
3426
+ {"role": "assistant", "content": '<tool_call>{"name":"X","arguments":{}}</tool_call>'},
3427
+ ],
3428
+ "tools": [{"type": "function", "function": {"name": "X", "parameters": {}}}],
3429
+ }
3430
+ anthropic_body = {"tools": [{"name": "X", "input_schema": {"type": "object"}}]}
3431
+ retry = proxy._build_malformed_retry_body(
3432
+ openai_body, anthropic_body, attempt=1, total_attempts=3,
3433
+ )
3434
+ # The assistant message should be sanitized
3435
+ assistant_msgs = [m for m in retry["messages"] if m.get("role") == "assistant"]
3436
+ for m in assistant_msgs:
3437
+ self.assertNotIn("<tool_call>", m.get("content", ""))
3438
+
3439
+
3239
3440
  if __name__ == "__main__":
3240
3441
  unittest.main()
3241
3442