@miller-tech/uap 1.20.27 → 1.20.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.27",
3
+ "version": "1.20.32",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -656,6 +656,8 @@ class SessionMonitor:
656
656
  tool_state_review_cycles: int = 0
657
657
  last_tool_fingerprint: str = ""
658
658
  cycling_tool_names: list = field(default_factory=list)
659
+ session_banned_tools: set = field(default_factory=set) # tools banned for entire session after repeated cycling
660
+ tool_cycle_counts: dict = field(default_factory=dict) # {tool_name: cycle_count} across resets
659
661
  last_response_garbled: bool = False # previous turn had garbled/malformed output
660
662
  finalize_turn_active: bool = False
661
663
  finalize_continuation_count: int = 0
@@ -1346,6 +1348,43 @@ def prune_conversation(
1346
1348
  http_client: httpx.AsyncClient | None = None
1347
1349
 
1348
1350
 
1351
+ def _is_loading_model_503(resp: httpx.Response) -> bool:
1352
+ """Check if response is a 503 'Loading model' from llama.cpp."""
1353
+ if resp.status_code != 503:
1354
+ return False
1355
+ try:
1356
+ return "loading model" in resp.text.lower()
1357
+ except Exception:
1358
+ return False
1359
+
1360
+
1361
+ async def _wait_for_upstream_health(
1362
+ client: httpx.AsyncClient,
1363
+ max_wait: float = 60.0,
1364
+ poll_interval: float = 5.0,
1365
+ ) -> bool:
1366
+ """Poll upstream /health until ready or timeout. Returns True if healthy."""
1367
+ health_url = LLAMA_CPP_BASE.replace("/v1", "/health")
1368
+ elapsed = 0.0
1369
+ while elapsed < max_wait:
1370
+ try:
1371
+ resp = await client.get(health_url, timeout=5.0)
1372
+ if resp.status_code == 200:
1373
+ data = resp.json() if resp.headers.get("content-type", "").startswith("application/json") else {}
1374
+ if data.get("status") == "ok" or resp.status_code == 200:
1375
+ if elapsed > 0:
1376
+ logger.info(
1377
+ "UPSTREAM HEALTH: recovered after %.0fs wait", elapsed
1378
+ )
1379
+ return True
1380
+ except Exception:
1381
+ pass
1382
+ await asyncio.sleep(poll_interval)
1383
+ elapsed += poll_interval
1384
+ logger.error("UPSTREAM HEALTH: not ready after %.0fs", max_wait)
1385
+ return False
1386
+
1387
+
1349
1388
  async def _post_with_retry(
1350
1389
  client: httpx.AsyncClient,
1351
1390
  url: str,
@@ -1355,7 +1394,19 @@ async def _post_with_retry(
1355
1394
  last_exc: Exception | None = None
1356
1395
  for attempt in range(PROXY_UPSTREAM_RETRY_MAX):
1357
1396
  try:
1358
- return await client.post(url, json=payload, headers=headers)
1397
+ resp = await client.post(url, json=payload, headers=headers)
1398
+ # Cycle 19 Option 1: if 503 "Loading model", wait for health then retry
1399
+ if _is_loading_model_503(resp):
1400
+ logger.warning(
1401
+ "Upstream 503 Loading model (attempt %d/%d) – waiting for health",
1402
+ attempt + 1,
1403
+ PROXY_UPSTREAM_RETRY_MAX,
1404
+ )
1405
+ healthy = await _wait_for_upstream_health(client, max_wait=60.0)
1406
+ if healthy and attempt < PROXY_UPSTREAM_RETRY_MAX - 1:
1407
+ continue # retry the request now that upstream is healthy
1408
+ return resp # return the 503 if health wait timed out
1409
+ return resp
1359
1410
  except (httpx.ConnectError, httpx.RemoteProtocolError, httpx.ReadTimeout) as exc:
1360
1411
  last_exc = exc
1361
1412
  if attempt < PROXY_UPSTREAM_RETRY_MAX - 1:
@@ -2240,6 +2291,16 @@ def _resolve_state_machine_tool_choice(
2240
2291
  for part in fp.split("|"):
2241
2292
  raw_names.append(part.split(":")[0])
2242
2293
  monitor.cycling_tool_names = list(dict.fromkeys(raw_names))
2294
+ # Cycle 18 Option 2: track per-tool cycle counts and ban after 3 cycles
2295
+ for name in monitor.cycling_tool_names:
2296
+ monitor.tool_cycle_counts[name] = monitor.tool_cycle_counts.get(name, 0) + 1
2297
+ if monitor.tool_cycle_counts[name] >= 3 and name not in monitor.session_banned_tools:
2298
+ monitor.session_banned_tools.add(name)
2299
+ logger.warning(
2300
+ "TOOL BAN: '%s' banned for session after %d cycle detections",
2301
+ name,
2302
+ monitor.tool_cycle_counts[name],
2303
+ )
2243
2304
  logger.warning(
2244
2305
  "TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d cycling_tools=%s)",
2245
2306
  cycle_looping,
@@ -2629,14 +2690,15 @@ def build_openai_request(
2629
2690
  cycling_names,
2630
2691
  cycles,
2631
2692
  )
2632
- # Narrow tools to exclude cycling tools
2693
+ # Narrow tools to exclude cycling tools + session-banned tools
2633
2694
  # Option 1 (Cycle 13): if any cycling tool is read-only, exclude entire class
2634
2695
  # Option 1 (Cycle 14): persist exclusion during act phase too, not just review
2696
+ # Option 2 (Cycle 18): always exclude session-banned tools
2635
2697
  if (
2636
- monitor.cycling_tool_names
2698
+ (monitor.cycling_tool_names or monitor.session_banned_tools)
2637
2699
  and "tools" in openai_body
2638
2700
  ):
2639
- exclude_set = set(monitor.cycling_tool_names)
2701
+ exclude_set = set(monitor.cycling_tool_names) | monitor.session_banned_tools
2640
2702
  # Expand to full read-only class if any cycling tool is read-only
2641
2703
  if any(n.lower() in {c.lower() for c in _READ_ONLY_TOOL_CLASS} for n in exclude_set):
2642
2704
  exclude_set |= _READ_ONLY_TOOL_CLASS
@@ -2648,13 +2710,15 @@ def build_openai_request(
2648
2710
  ]
2649
2711
  if narrowed:
2650
2712
  openai_body["tools"] = narrowed
2651
- logger.warning(
2652
- "CYCLE BREAK: narrowed tools from %d to %d (excluded %s, read_only_class=%s)",
2653
- original_count,
2654
- len(narrowed),
2655
- monitor.cycling_tool_names,
2656
- any(n.lower() in {c.lower() for c in _READ_ONLY_TOOL_CLASS} for n in monitor.cycling_tool_names),
2657
- )
2713
+ # Only log on first activation or phase transitions to reduce noise
2714
+ if state_reason in {"cycle_detected", "stagnation"}:
2715
+ logger.warning(
2716
+ "CYCLE BREAK: narrowed tools from %d to %d (excluded %s, read_only_class=%s)",
2717
+ original_count,
2718
+ len(narrowed),
2719
+ monitor.cycling_tool_names,
2720
+ any(n.lower() in {c.lower() for c in _READ_ONLY_TOOL_CLASS} for n in monitor.cycling_tool_names),
2721
+ )
2658
2722
  else:
2659
2723
  logger.warning(
2660
2724
  "CYCLE BREAK: cannot narrow tools — all tools are cycling, keeping original set",
@@ -3092,6 +3156,47 @@ _TOOL_CALL_XML_RE = re.compile(
3092
3156
  )
3093
3157
 
3094
3158
 
3159
+ def _repair_tool_call_json(raw: str) -> str | None:
3160
+ """Attempt to repair common garbled JSON in tool call payloads.
3161
+
3162
+ Returns repaired JSON string, or None if repair is not possible.
3163
+ Handles: trailing braces, unbalanced brackets, truncated strings.
3164
+ """
3165
+ s = raw.strip()
3166
+ if not s.startswith("{"):
3167
+ return None
3168
+ # Strip trailing garbage (runaway braces/brackets)
3169
+ while s.endswith("}}") and s.count("{") < s.count("}"):
3170
+ s = s[:-1]
3171
+ while s.endswith("]]") and s.count("[") < s.count("]"):
3172
+ s = s[:-1]
3173
+ # Balance braces
3174
+ open_b = s.count("{") - s.count("}")
3175
+ if open_b > 0:
3176
+ s += "}" * open_b
3177
+ elif open_b < 0:
3178
+ # Too many closing braces — trim from end
3179
+ for _ in range(-open_b):
3180
+ idx = s.rfind("}")
3181
+ if idx > 0:
3182
+ s = s[:idx] + s[idx + 1:]
3183
+ # Try to parse
3184
+ try:
3185
+ json.loads(s)
3186
+ return s
3187
+ except json.JSONDecodeError:
3188
+ pass
3189
+ # Try truncating at last valid comma + closing
3190
+ for end in range(len(s) - 1, max(0, len(s) - 200), -1):
3191
+ candidate = s[:end].rstrip().rstrip(",") + "}" * max(0, s[:end].count("{") - s[:end].count("}"))
3192
+ try:
3193
+ json.loads(candidate)
3194
+ return candidate
3195
+ except json.JSONDecodeError:
3196
+ continue
3197
+ return None
3198
+
3199
+
3095
3200
  def _extract_tool_calls_from_text(text: str) -> tuple[list[dict], str]:
3096
3201
  """Parse ``<tool_call>{...}</tool_call>`` blocks out of *text*.
3097
3202
 
@@ -3112,7 +3217,18 @@ def _extract_tool_calls_from_text(text: str) -> tuple[list[dict], str]:
3112
3217
  try:
3113
3218
  payload = json.loads(raw_json)
3114
3219
  except json.JSONDecodeError:
3115
- continue
3220
+ # Cycle 15 Option 1: attempt JSON repair before giving up
3221
+ repaired = _repair_tool_call_json(raw_json)
3222
+ if repaired:
3223
+ try:
3224
+ payload = json.loads(repaired)
3225
+ logger.info(
3226
+ "TOOL CALL EXTRACTION: repaired garbled JSON in <tool_call> block"
3227
+ )
3228
+ except json.JSONDecodeError:
3229
+ continue
3230
+ else:
3231
+ continue
3116
3232
  if not isinstance(payload, dict):
3117
3233
  continue
3118
3234
 
@@ -4380,9 +4496,11 @@ def _build_malformed_retry_body(
4380
4496
  retry_body = dict(openai_body)
4381
4497
  retry_body["stream"] = False
4382
4498
  retry_body["tool_choice"] = tool_choice
4383
- # Escalate temperature down on successive retries for more deterministic output
4499
+ # Cycle 15 Option 3: vary temperature across retries to break degenerate patterns.
4500
+ # Attempt 1: use configured retry temp (default 0.0) for deterministic first try.
4501
+ # Attempt 2+: increase to 0.5 to escape the degenerate local minimum.
4384
4502
  if total_attempts > 1 and attempt > 1:
4385
- retry_body["temperature"] = 0.0
4503
+ retry_body["temperature"] = 0.5
4386
4504
  else:
4387
4505
  retry_body["temperature"] = PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE
4388
4506
 
@@ -5922,6 +6040,27 @@ async def messages(request: Request):
5922
6040
 
5923
6041
  if strict_resp.status_code != 200:
5924
6042
  error_text = strict_resp.text[:1000]
6043
+ # Cycle 19 Option 2: For 503 "Loading model", don't advance state
6044
+ # machine — return retriable 503 with Retry-After header so the
6045
+ # client can retry without wasting state machine budget.
6046
+ if _is_loading_model_503(strict_resp):
6047
+ logger.warning(
6048
+ "Upstream 503 Loading model (strict-stream) — returning retriable 503 without advancing state",
6049
+ )
6050
+ return Response(
6051
+ content=json.dumps(
6052
+ {
6053
+ "type": "error",
6054
+ "error": {
6055
+ "type": "overloaded_error",
6056
+ "message": "Upstream model is loading. Retry in 10 seconds.",
6057
+ },
6058
+ }
6059
+ ),
6060
+ status_code=503,
6061
+ headers={"Retry-After": "10"},
6062
+ media_type="application/json",
6063
+ )
5925
6064
  logger.error(
5926
6065
  "Upstream HTTP %d (strict-stream): %s",
5927
6066
  strict_resp.status_code,
@@ -4662,3 +4662,159 @@ class TestMalformedPayloadLoopFix(unittest.TestCase):
4662
4662
  }
4663
4663
  openai = proxy.build_openai_request(body, monitor)
4664
4664
  self.assertAlmostEqual(openai.get("temperature", 1.0), 0.3, places=1)
4665
+
4666
+
4667
+ class TestToolCallJsonRepair(unittest.TestCase):
4668
+ """Tests for Cycle 15 Option 1: JSON repair in tool call extraction."""
4669
+
4670
+ def test_repairs_trailing_braces(self):
4671
+ """Runaway closing braces are trimmed and JSON parsed."""
4672
+ garbled = '{"name":"bash","arguments":{"command":"ls"}}}}'
4673
+ repaired = proxy._repair_tool_call_json(garbled)
4674
+ self.assertIsNotNone(repaired)
4675
+ parsed = json.loads(repaired)
4676
+ self.assertEqual(parsed["name"], "bash")
4677
+
4678
+ def test_repairs_unbalanced_open_braces(self):
4679
+ """Missing closing braces are added."""
4680
+ garbled = '{"name":"read","arguments":{"file_path":"/foo"}'
4681
+ repaired = proxy._repair_tool_call_json(garbled)
4682
+ self.assertIsNotNone(repaired)
4683
+ parsed = json.loads(repaired)
4684
+ self.assertEqual(parsed["name"], "read")
4685
+
4686
+ def test_returns_none_for_total_garbage(self):
4687
+ """Completely invalid JSON returns None."""
4688
+ result = proxy._repair_tool_call_json("not json at all")
4689
+ self.assertIsNone(result)
4690
+
4691
+ def test_extracts_repaired_tool_call_from_text(self):
4692
+ """End-to-end: garbled <tool_call> XML is extracted after repair."""
4693
+ text = '<tool_call>\n{"name":"bash","arguments":{"command":"pwd"}}}\n</tool_call>'
4694
+ extracted, remaining = proxy._extract_tool_calls_from_text(text)
4695
+ self.assertEqual(len(extracted), 1)
4696
+ self.assertEqual(extracted[0]["function"]["name"], "bash")
4697
+
4698
+
4699
+ class TestRetryTemperatureVariance(unittest.TestCase):
4700
+ """Tests for Cycle 15 Option 3: retry temperature variance."""
4701
+
4702
+ def test_retry_attempt_1_uses_configured_temp(self):
4703
+ """First retry attempt uses PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE."""
4704
+ body = proxy._build_malformed_retry_body(
4705
+ {"messages": [{"role": "user", "content": "test"}], "tools": []},
4706
+ {"messages": [{"role": "user", "content": "test"}], "tools": []},
4707
+ retry_hint="fix it",
4708
+ tool_choice="required",
4709
+ attempt=1,
4710
+ total_attempts=3,
4711
+ is_garbled=False,
4712
+ )
4713
+ self.assertEqual(body["temperature"], proxy.PROXY_MALFORMED_TOOL_RETRY_TEMPERATURE)
4714
+
4715
+ def test_retry_attempt_2_uses_higher_temp(self):
4716
+ """Second retry attempt uses temp=0.5 to break degenerate patterns."""
4717
+ body = proxy._build_malformed_retry_body(
4718
+ {"messages": [{"role": "user", "content": "test"}], "tools": []},
4719
+ {"messages": [{"role": "user", "content": "test"}], "tools": []},
4720
+ retry_hint="fix it",
4721
+ tool_choice="required",
4722
+ attempt=2,
4723
+ total_attempts=3,
4724
+ is_garbled=False,
4725
+ )
4726
+ self.assertEqual(body["temperature"], 0.5)
4727
+
4728
+
4729
+ class TestCycle18SessionBanAndLogNoise(unittest.TestCase):
4730
+ """Tests for Cycle 18: session tool banning and log noise reduction."""
4731
+
4732
+ def test_tool_banned_after_3_cycle_detections(self):
4733
+ """Option 2: tool gets session-banned after cycling 3 times."""
4734
+ monitor = proxy.SessionMonitor(context_window=262144)
4735
+ # Simulate 3 separate cycle detections for 'task'
4736
+ monitor.tool_cycle_counts["task"] = 2
4737
+ monitor.cycling_tool_names = ["task"]
4738
+
4739
+ # This is what happens inside the cycle detection — manually trigger
4740
+ for name in monitor.cycling_tool_names:
4741
+ monitor.tool_cycle_counts[name] = monitor.tool_cycle_counts.get(name, 0) + 1
4742
+ if monitor.tool_cycle_counts[name] >= 3:
4743
+ monitor.session_banned_tools.add(name)
4744
+
4745
+ self.assertIn("task", monitor.session_banned_tools)
4746
+ self.assertEqual(monitor.tool_cycle_counts["task"], 3)
4747
+
4748
+ def test_session_ban_survives_state_reset(self):
4749
+ """Option 2: session_banned_tools persists through reset_tool_turn_state."""
4750
+ monitor = proxy.SessionMonitor(context_window=262144)
4751
+ monitor.session_banned_tools.add("task")
4752
+ monitor.tool_cycle_counts["task"] = 3
4753
+
4754
+ monitor.reset_tool_turn_state(reason="test")
4755
+
4756
+ # Session bans survive resets — they're session-level, not phase-level
4757
+ self.assertIn("task", monitor.session_banned_tools)
4758
+ self.assertEqual(monitor.tool_cycle_counts["task"], 3)
4759
+
4760
+ def test_banned_tools_excluded_even_without_cycling(self):
4761
+ """Option 2: session-banned tools are excluded even when cycling_tool_names is empty."""
4762
+ old_vals = {}
4763
+ for k in ["PROXY_TOOL_STATE_MACHINE", "PROXY_TOOL_STATE_MIN_MESSAGES",
4764
+ "PROXY_TOOL_STATE_FORCED_BUDGET"]:
4765
+ old_vals[k] = getattr(proxy, k)
4766
+ try:
4767
+ setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
4768
+ setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
4769
+ setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 6)
4770
+
4771
+ body = {
4772
+ "model": "test",
4773
+ "messages": [
4774
+ {"role": "user", "content": "do"},
4775
+ {"role": "assistant", "content": [
4776
+ {"type": "tool_use", "id": "t1", "name": "bash", "input": {"command": "ls"}}
4777
+ ]},
4778
+ {"role": "user", "content": [
4779
+ {"type": "tool_result", "tool_use_id": "t1", "content": "ok"}
4780
+ ]},
4781
+ ],
4782
+ "tools": [
4783
+ {"name": "task", "description": "Task", "input_schema": {"type": "object"}},
4784
+ {"name": "bash", "description": "Bash", "input_schema": {"type": "object"}},
4785
+ {"name": "read", "description": "Read", "input_schema": {"type": "object"}},
4786
+ ],
4787
+ }
4788
+ monitor = proxy.SessionMonitor(context_window=262144)
4789
+ monitor.session_banned_tools.add("task")
4790
+ monitor.cycling_tool_names = [] # no active cycling
4791
+
4792
+ openai = proxy.build_openai_request(body, monitor)
4793
+ remaining = [t["function"]["name"] for t in openai.get("tools", [])]
4794
+ self.assertNotIn("task", remaining)
4795
+ self.assertIn("bash", remaining)
4796
+ finally:
4797
+ for k, v in old_vals.items():
4798
+ setattr(proxy, k, v)
4799
+
4800
+
4801
+ class TestUpstream503Resilience(unittest.TestCase):
4802
+ """Tests for Cycle 19: upstream 503 Loading model resilience."""
4803
+
4804
+ def test_is_loading_model_503_detects_loading(self):
4805
+ """Detects 503 Loading model response."""
4806
+ resp = httpx.Response(
4807
+ 503,
4808
+ text='{"error":{"message":"Loading model","type":"unavailable_error","code":503}}',
4809
+ )
4810
+ self.assertTrue(proxy._is_loading_model_503(resp))
4811
+
4812
+ def test_is_loading_model_503_ignores_other_503(self):
4813
+ """Does not match 503 with different message."""
4814
+ resp = httpx.Response(503, text='{"error":{"message":"Server busy"}}')
4815
+ self.assertFalse(proxy._is_loading_model_503(resp))
4816
+
4817
+ def test_is_loading_model_503_ignores_200(self):
4818
+ """Does not match 200 even with loading text."""
4819
+ resp = httpx.Response(200, text='{"status":"loading model"}')
4820
+ self.assertFalse(proxy._is_loading_model_503(resp))