@miller-tech/uap 1.20.47 → 1.20.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -224,6 +224,16 @@ PROXY_FINALIZE_CONTINUATION_MAX = int(
|
|
|
224
224
|
PROXY_FINALIZE_SESSION_HARD_CAP = int(
|
|
225
225
|
os.environ.get("PROXY_FINALIZE_SESSION_HARD_CAP", "3")
|
|
226
226
|
)
|
|
227
|
+
# Recon-convergence guardrail: after this many consecutive turns of PURE
|
|
228
|
+
# read-only exploration (Read/Grep/Glob/etc. — no write/edit/deliverable
|
|
229
|
+
# tool), the proxy injects a directive telling the model to stop exploring
|
|
230
|
+
# and produce its deliverable. Targets the failure mode where an agentic
|
|
231
|
+
# recon task reads files for hundreds of turns and never converges to the
|
|
232
|
+
# synthesis/write step (observed: 664-turn recon, no deliverable started).
|
|
233
|
+
# 0 disables.
|
|
234
|
+
PROXY_RECON_CONVERGENCE_THRESHOLD = int(
|
|
235
|
+
os.environ.get("PROXY_RECON_CONVERGENCE_THRESHOLD", "40")
|
|
236
|
+
)
|
|
227
237
|
PROXY_STREAM_REASONING_FALLBACK = (
|
|
228
238
|
os.environ.get("PROXY_STREAM_REASONING_FALLBACK", "off").strip().lower()
|
|
229
239
|
)
|
|
@@ -716,6 +726,7 @@ class SessionMonitor:
|
|
|
716
726
|
)
|
|
717
727
|
loop_warnings_emitted: int = 0 # How many loop warnings sent to the model
|
|
718
728
|
no_progress_streak: int = 0 # Forced tool turns without new tool_result
|
|
729
|
+
consecutive_readonly_turns: int = 0 # turns of pure read-only exploration (B1)
|
|
719
730
|
unexpected_end_turn_count: int = 0 # end_turn without tool_use in active loop
|
|
720
731
|
tool_starvation_streak: int = 0 # Consecutive forced turns with no tool_calls produced
|
|
721
732
|
malformed_tool_streak: int = 0 # consecutive malformed pseudo tool payloads
|
|
@@ -873,6 +884,16 @@ class SessionMonitor:
|
|
|
873
884
|
if len(self.tool_call_history) > 30:
|
|
874
885
|
self.tool_call_history = self.tool_call_history[-30:]
|
|
875
886
|
|
|
887
|
+
# Recon-convergence (B1): count consecutive turns of PURE read-only
|
|
888
|
+
# exploration. A turn that uses any non-read-only tool (write, edit,
|
|
889
|
+
# a deliverable tool) resets the streak — that's the model
|
|
890
|
+
# converging from exploration toward synthesis/action.
|
|
891
|
+
_ro = {n.lower() for n in _READ_ONLY_TOOL_CLASS}
|
|
892
|
+
if tool_names and all(n.lower() in _ro for n in tool_names):
|
|
893
|
+
self.consecutive_readonly_turns += 1
|
|
894
|
+
else:
|
|
895
|
+
self.consecutive_readonly_turns = 0
|
|
896
|
+
|
|
876
897
|
# Track read-only tool targets for dedup (Option 3)
|
|
877
898
|
if tool_targets:
|
|
878
899
|
for name, target in tool_targets.items():
|
|
@@ -3218,6 +3239,51 @@ def _resolve_state_machine_tool_choice(
|
|
|
3218
3239
|
return None, "unknown_phase"
|
|
3219
3240
|
|
|
3220
3241
|
|
|
3242
|
+
def _maybe_inject_recon_convergence(openai_body: dict, monitor: "SessionMonitor") -> None:
|
|
3243
|
+
"""Nudge a session stuck in prolonged read-only exploration toward its
|
|
3244
|
+
deliverable.
|
|
3245
|
+
|
|
3246
|
+
Fires when `consecutive_readonly_turns` crosses
|
|
3247
|
+
PROXY_RECON_CONVERGENCE_THRESHOLD — the model has read files for many
|
|
3248
|
+
turns without writing anything. Targets the observed failure mode of
|
|
3249
|
+
an agentic recon task wandering for hundreds of turns and never
|
|
3250
|
+
converging to the synthesis/write step. Two escalation tiers: a firm
|
|
3251
|
+
"switch to synthesis" directive, then a hard "STOP, write it now" once
|
|
3252
|
+
the streak is 2x over threshold.
|
|
3253
|
+
"""
|
|
3254
|
+
if PROXY_RECON_CONVERGENCE_THRESHOLD <= 0:
|
|
3255
|
+
return
|
|
3256
|
+
streak = monitor.consecutive_readonly_turns
|
|
3257
|
+
if streak < PROXY_RECON_CONVERGENCE_THRESHOLD:
|
|
3258
|
+
return
|
|
3259
|
+
util = monitor.get_utilization()
|
|
3260
|
+
if streak >= 2 * PROXY_RECON_CONVERGENCE_THRESHOLD:
|
|
3261
|
+
directive = (
|
|
3262
|
+
f"STOP exploring. You have run {streak} consecutive turns of "
|
|
3263
|
+
f"read-only exploration and context is at {util * 100:.0f}%. "
|
|
3264
|
+
"You will NOT finish if you keep reading files. Produce your "
|
|
3265
|
+
"deliverable NOW from the information you already have — write "
|
|
3266
|
+
"it to a file with the appropriate tool. Do not read anything else."
|
|
3267
|
+
)
|
|
3268
|
+
tier = "hard"
|
|
3269
|
+
else:
|
|
3270
|
+
directive = (
|
|
3271
|
+
f"You have read files for {streak} consecutive turns without "
|
|
3272
|
+
f"producing a deliverable (context {util * 100:.0f}%). You have "
|
|
3273
|
+
"enough to begin. Switch from exploration to synthesis: write "
|
|
3274
|
+
"your deliverable now. Read at most one more file, and only if "
|
|
3275
|
+
"strictly required to write it."
|
|
3276
|
+
)
|
|
3277
|
+
tier = "firm"
|
|
3278
|
+
msgs = openai_body.get("messages", [])
|
|
3279
|
+
msgs.append({"role": "user", "content": directive})
|
|
3280
|
+
openai_body["messages"] = msgs
|
|
3281
|
+
logger.warning(
|
|
3282
|
+
"RECON CONVERGENCE: injected %s directive (readonly_streak=%d, ctx=%.0f%%)",
|
|
3283
|
+
tier, streak, util * 100,
|
|
3284
|
+
)
|
|
3285
|
+
|
|
3286
|
+
|
|
3221
3287
|
def build_openai_request(
|
|
3222
3288
|
anthropic_body: dict,
|
|
3223
3289
|
monitor: SessionMonitor,
|
|
@@ -3725,6 +3791,11 @@ def build_openai_request(
|
|
|
3725
3791
|
|
|
3726
3792
|
_apply_tool_call_grammar(openai_body, grammar_override=profile_grammar)
|
|
3727
3793
|
|
|
3794
|
+
# Recon-convergence guardrail (B1) — runs on every built request so a
|
|
3795
|
+
# session wandering in read-only exploration is nudged toward its
|
|
3796
|
+
# deliverable regardless of tool-turn phase.
|
|
3797
|
+
_maybe_inject_recon_convergence(openai_body, monitor)
|
|
3798
|
+
|
|
3728
3799
|
return openai_body
|
|
3729
3800
|
|
|
3730
3801
|
|
|
@@ -5371,3 +5371,84 @@ class TestSlotSaveRestore(unittest.TestCase):
|
|
|
5371
5371
|
self.assertIn("fp:owner", proxy._slot_lru)
|
|
5372
5372
|
self.assertIn("fp:new1", proxy._slot_lru)
|
|
5373
5373
|
self.assertIn("fp:new2", proxy._slot_lru)
|
|
5374
|
+
|
|
5375
|
+
|
|
5376
|
+
class TestReconConvergence(unittest.TestCase):
|
|
5377
|
+
"""Tests for the B1 recon-convergence guardrail — nudges a session
|
|
5378
|
+
stuck doing read-only exploration toward producing its deliverable.
|
|
5379
|
+
|
|
5380
|
+
Targets the observed failure: a 664-turn agentic recon task that read
|
|
5381
|
+
files for hours and never converged to the synthesis/write step."""
|
|
5382
|
+
|
|
5383
|
+
def setUp(self):
|
|
5384
|
+
self._threshold = proxy.PROXY_RECON_CONVERGENCE_THRESHOLD
|
|
5385
|
+
|
|
5386
|
+
def tearDown(self):
|
|
5387
|
+
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = self._threshold
|
|
5388
|
+
|
|
5389
|
+
def test_readonly_turns_increment_the_streak(self):
|
|
5390
|
+
"""Consecutive turns using only read-only tools grow the streak."""
|
|
5391
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5392
|
+
for _ in range(5):
|
|
5393
|
+
m.record_tool_calls(["Read"])
|
|
5394
|
+
self.assertEqual(m.consecutive_readonly_turns, 5)
|
|
5395
|
+
m.record_tool_calls(["Grep", "Glob"])
|
|
5396
|
+
self.assertEqual(m.consecutive_readonly_turns, 6)
|
|
5397
|
+
|
|
5398
|
+
def test_non_readonly_tool_resets_the_streak(self):
|
|
5399
|
+
"""A turn using a write/edit tool means the model converged toward
|
|
5400
|
+
action — the streak resets to 0."""
|
|
5401
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5402
|
+
for _ in range(10):
|
|
5403
|
+
m.record_tool_calls(["Read"])
|
|
5404
|
+
self.assertEqual(m.consecutive_readonly_turns, 10)
|
|
5405
|
+
m.record_tool_calls(["Write"])
|
|
5406
|
+
self.assertEqual(m.consecutive_readonly_turns, 0)
|
|
5407
|
+
|
|
5408
|
+
def test_mixed_turn_with_one_write_resets(self):
|
|
5409
|
+
"""A turn mixing read-only and a write tool still counts as
|
|
5410
|
+
converging — any non-read-only tool resets."""
|
|
5411
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5412
|
+
for _ in range(10):
|
|
5413
|
+
m.record_tool_calls(["Read"])
|
|
5414
|
+
m.record_tool_calls(["Read", "Edit"])
|
|
5415
|
+
self.assertEqual(m.consecutive_readonly_turns, 0)
|
|
5416
|
+
|
|
5417
|
+
def test_no_injection_below_threshold(self):
|
|
5418
|
+
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
|
|
5419
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5420
|
+
m.consecutive_readonly_turns = 39
|
|
5421
|
+
body = {"messages": [{"role": "user", "content": "go"}]}
|
|
5422
|
+
proxy._maybe_inject_recon_convergence(body, m)
|
|
5423
|
+
self.assertEqual(len(body["messages"]), 1)
|
|
5424
|
+
|
|
5425
|
+
def test_firm_directive_at_threshold(self):
|
|
5426
|
+
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
|
|
5427
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5428
|
+
m.consecutive_readonly_turns = 45
|
|
5429
|
+
m.last_input_tokens = 120000
|
|
5430
|
+
body = {"messages": [{"role": "user", "content": "go"}]}
|
|
5431
|
+
proxy._maybe_inject_recon_convergence(body, m)
|
|
5432
|
+
self.assertEqual(len(body["messages"]), 2)
|
|
5433
|
+
injected = body["messages"][-1]["content"]
|
|
5434
|
+
self.assertIn("synthesis", injected.lower())
|
|
5435
|
+
self.assertNotIn("STOP exploring", injected)
|
|
5436
|
+
|
|
5437
|
+
def test_hard_directive_at_2x_threshold(self):
|
|
5438
|
+
"""Once the streak is 2x over threshold, escalate to a hard STOP."""
|
|
5439
|
+
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
|
|
5440
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5441
|
+
m.consecutive_readonly_turns = 80
|
|
5442
|
+
m.last_input_tokens = 250000 # over budget — the real-incident shape
|
|
5443
|
+
body = {"messages": [{"role": "user", "content": "go"}]}
|
|
5444
|
+
proxy._maybe_inject_recon_convergence(body, m)
|
|
5445
|
+
injected = body["messages"][-1]["content"]
|
|
5446
|
+
self.assertIn("STOP exploring", injected)
|
|
5447
|
+
|
|
5448
|
+
def test_disabled_when_threshold_zero(self):
|
|
5449
|
+
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 0
|
|
5450
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5451
|
+
m.consecutive_readonly_turns = 500
|
|
5452
|
+
body = {"messages": [{"role": "user", "content": "go"}]}
|
|
5453
|
+
proxy._maybe_inject_recon_convergence(body, m)
|
|
5454
|
+
self.assertEqual(len(body["messages"]), 1)
|