@miller-tech/uap 1.20.49 → 1.20.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -207,6 +207,19 @@ _READ_ONLY_TOOL_CLASS = frozenset({
|
|
|
207
207
|
"search", "Search", "list_files", "ListFiles",
|
|
208
208
|
})
|
|
209
209
|
|
|
210
|
+
# Tools that produce or mutate a deliverable. Using any of these in a turn
|
|
211
|
+
# means the agent is converging from exploration toward output, and resets
|
|
212
|
+
# the recon-convergence streak (B1). This is deliberately a SHORT allowlist
|
|
213
|
+
# of write tools, NOT a read-only denylist: exploration happens through an
|
|
214
|
+
# open-ended set of tools (Bash, WebFetch, Agent, ...) that cannot be
|
|
215
|
+
# enumerated, but "the agent produced a write" is a small, stable signal.
|
|
216
|
+
# Names are matched case-insensitively (callers lower() before lookup).
|
|
217
|
+
_WRITE_TOOL_CLASS = frozenset({
|
|
218
|
+
"write", "edit", "multiedit", "notebookedit",
|
|
219
|
+
"str_replace", "str_replace_editor", "str_replace_based_edit_tool",
|
|
220
|
+
"create_file", "applypatch", "apply_patch",
|
|
221
|
+
})
|
|
222
|
+
|
|
210
223
|
PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() not in {
|
|
211
224
|
"0",
|
|
212
225
|
"false",
|
|
@@ -224,12 +237,15 @@ PROXY_FINALIZE_CONTINUATION_MAX = int(
|
|
|
224
237
|
PROXY_FINALIZE_SESSION_HARD_CAP = int(
|
|
225
238
|
os.environ.get("PROXY_FINALIZE_SESSION_HARD_CAP", "3")
|
|
226
239
|
)
|
|
227
|
-
# Recon-convergence guardrail: after this many consecutive turns
|
|
228
|
-
#
|
|
229
|
-
#
|
|
230
|
-
#
|
|
231
|
-
#
|
|
240
|
+
# Recon-convergence guardrail: after this many consecutive turns that use
|
|
241
|
+
# tools but produce NO write/deliverable tool call (see _WRITE_TOOL_CLASS),
|
|
242
|
+
# the proxy injects a directive telling the model to stop exploring and
|
|
243
|
+
# produce its deliverable. Targets the failure mode where an agentic recon
|
|
244
|
+
# task explores for hundreds of turns and never converges to the
|
|
232
245
|
# synthesis/write step (observed: 664-turn recon, no deliverable started).
|
|
246
|
+
# Defined as write-tool ABSENCE rather than read-tool presence: a real
|
|
247
|
+
# recon agent explores via Bash/WebFetch/Agent, not just Read/Grep, so a
|
|
248
|
+
# "all tools are recognized read-only" test never accumulates a streak.
|
|
233
249
|
# 0 disables.
|
|
234
250
|
PROXY_RECON_CONVERGENCE_THRESHOLD = int(
|
|
235
251
|
os.environ.get("PROXY_RECON_CONVERGENCE_THRESHOLD", "40")
|
|
@@ -727,7 +743,7 @@ class SessionMonitor:
|
|
|
727
743
|
)
|
|
728
744
|
loop_warnings_emitted: int = 0 # How many loop warnings sent to the model
|
|
729
745
|
no_progress_streak: int = 0 # Forced tool turns without new tool_result
|
|
730
|
-
|
|
746
|
+
consecutive_no_write_turns: int = 0 # turns exploring with no write tool (B1)
|
|
731
747
|
unexpected_end_turn_count: int = 0 # end_turn without tool_use in active loop
|
|
732
748
|
tool_starvation_streak: int = 0 # Consecutive forced turns with no tool_calls produced
|
|
733
749
|
malformed_tool_streak: int = 0 # consecutive malformed pseudo tool payloads
|
|
@@ -885,15 +901,19 @@ class SessionMonitor:
|
|
|
885
901
|
if len(self.tool_call_history) > 30:
|
|
886
902
|
self.tool_call_history = self.tool_call_history[-30:]
|
|
887
903
|
|
|
888
|
-
# Recon-convergence (B1): count consecutive turns
|
|
889
|
-
#
|
|
890
|
-
#
|
|
891
|
-
#
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
904
|
+
# Recon-convergence (B1): count consecutive turns that use tools but
|
|
905
|
+
# produce NO write/deliverable tool call. A turn that uses any write
|
|
906
|
+
# tool resets the streak — that's the model converging from
|
|
907
|
+
# exploration toward synthesis/output. A turn with no tool calls at
|
|
908
|
+
# all is a plain-text turn (neither exploration nor a write) and
|
|
909
|
+
# leaves the streak unchanged. This is the inverse of the old
|
|
910
|
+
# "all tools are recognized read-only" test, which reset on any
|
|
911
|
+
# Bash/WebFetch/Agent turn and so never accumulated for real agents.
|
|
912
|
+
if tool_names:
|
|
913
|
+
if any(n.lower() in _WRITE_TOOL_CLASS for n in tool_names):
|
|
914
|
+
self.consecutive_no_write_turns = 0
|
|
915
|
+
else:
|
|
916
|
+
self.consecutive_no_write_turns += 1
|
|
897
917
|
|
|
898
918
|
# Track read-only tool targets for dedup (Option 3)
|
|
899
919
|
if tool_targets:
|
|
@@ -3268,48 +3288,78 @@ def _resolve_state_machine_tool_choice(
|
|
|
3268
3288
|
return None, "unknown_phase"
|
|
3269
3289
|
|
|
3270
3290
|
|
|
3271
|
-
def _maybe_inject_recon_convergence(
|
|
3272
|
-
|
|
3273
|
-
|
|
3274
|
-
|
|
3275
|
-
|
|
3276
|
-
|
|
3277
|
-
|
|
3278
|
-
|
|
3279
|
-
|
|
3280
|
-
|
|
3281
|
-
|
|
3291
|
+
def _maybe_inject_recon_convergence(
|
|
3292
|
+
openai_body: dict,
|
|
3293
|
+
monitor: "SessionMonitor",
|
|
3294
|
+
full_tools: list[dict] | None = None,
|
|
3295
|
+
) -> None:
|
|
3296
|
+
"""Nudge a session stuck in prolonged exploration toward its deliverable.
|
|
3297
|
+
|
|
3298
|
+
Fires when `consecutive_no_write_turns` crosses
|
|
3299
|
+
PROXY_RECON_CONVERGENCE_THRESHOLD — the model has used tools for many
|
|
3300
|
+
turns without producing any write/deliverable tool call. Targets the
|
|
3301
|
+
observed failure mode of an agentic recon task wandering for hundreds
|
|
3302
|
+
of turns and never converging to the synthesis/write step. Two
|
|
3303
|
+
escalation tiers: a firm "switch to synthesis" directive, then a hard
|
|
3304
|
+
"STOP, write it now" once the streak is 2x over threshold.
|
|
3305
|
+
|
|
3306
|
+
`full_tools` is the request's tool list *before* `_narrow_tools_for_request`
|
|
3307
|
+
pruned it. When the directive fires, any write/deliverable tool that
|
|
3308
|
+
narrowing dropped is re-injected into `openai_body["tools"]` — narrowing
|
|
3309
|
+
scores tools against the (exploration-heavy) recon prompt and runs before
|
|
3310
|
+
this guardrail, so it routinely strips the very write tool the directive
|
|
3311
|
+
tells the model to use, leaving the directive impossible to satisfy.
|
|
3282
3312
|
"""
|
|
3283
3313
|
if PROXY_RECON_CONVERGENCE_THRESHOLD <= 0:
|
|
3284
3314
|
return
|
|
3285
|
-
streak = monitor.
|
|
3315
|
+
streak = monitor.consecutive_no_write_turns
|
|
3286
3316
|
if streak < PROXY_RECON_CONVERGENCE_THRESHOLD:
|
|
3287
3317
|
return
|
|
3288
3318
|
util = monitor.get_utilization()
|
|
3289
3319
|
if streak >= 2 * PROXY_RECON_CONVERGENCE_THRESHOLD:
|
|
3290
3320
|
directive = (
|
|
3291
3321
|
f"STOP exploring. You have run {streak} consecutive turns of "
|
|
3292
|
-
f"
|
|
3293
|
-
"You will NOT finish if you keep
|
|
3294
|
-
"deliverable NOW from the information you already
|
|
3295
|
-
"it to a file with the appropriate tool. Do not
|
|
3322
|
+
f"exploration without producing a deliverable and context is at "
|
|
3323
|
+
f"{util * 100:.0f}%. You will NOT finish if you keep exploring. "
|
|
3324
|
+
"Produce your deliverable NOW from the information you already "
|
|
3325
|
+
"have — write it to a file with the appropriate tool. Do not "
|
|
3326
|
+
"read or run anything else."
|
|
3296
3327
|
)
|
|
3297
3328
|
tier = "hard"
|
|
3298
3329
|
else:
|
|
3299
3330
|
directive = (
|
|
3300
|
-
f"You have
|
|
3331
|
+
f"You have explored for {streak} consecutive turns without "
|
|
3301
3332
|
f"producing a deliverable (context {util * 100:.0f}%). You have "
|
|
3302
3333
|
"enough to begin. Switch from exploration to synthesis: write "
|
|
3303
|
-
"your deliverable now.
|
|
3304
|
-
"strictly required to write it."
|
|
3334
|
+
"your deliverable now. Explore at most one more time, and only "
|
|
3335
|
+
"if strictly required to write it."
|
|
3305
3336
|
)
|
|
3306
3337
|
tier = "firm"
|
|
3307
3338
|
msgs = openai_body.get("messages", [])
|
|
3308
3339
|
msgs.append({"role": "user", "content": directive})
|
|
3309
3340
|
openai_body["messages"] = msgs
|
|
3341
|
+
|
|
3342
|
+
# Re-inject any write/deliverable tool that narrowing dropped, so the
|
|
3343
|
+
# "write your deliverable" directive is actually satisfiable. Without
|
|
3344
|
+
# this the model is told to write but has no write tool to call, picks
|
|
3345
|
+
# another read tool, and the streak climbs unbounded.
|
|
3346
|
+
restored: list[str] = []
|
|
3347
|
+
if full_tools:
|
|
3348
|
+
present = {
|
|
3349
|
+
(t.get("function", {}).get("name", "") or "").lower()
|
|
3350
|
+
for t in openai_body.get("tools", [])
|
|
3351
|
+
}
|
|
3352
|
+
for tool in full_tools:
|
|
3353
|
+
name = (tool.get("function", {}).get("name", "") or "")
|
|
3354
|
+
if name.lower() in _WRITE_TOOL_CLASS and name.lower() not in present:
|
|
3355
|
+
openai_body.setdefault("tools", []).append(tool)
|
|
3356
|
+
present.add(name.lower())
|
|
3357
|
+
restored.append(name)
|
|
3358
|
+
|
|
3310
3359
|
logger.warning(
|
|
3311
|
-
"RECON CONVERGENCE: injected %s directive (
|
|
3312
|
-
|
|
3360
|
+
"RECON CONVERGENCE: injected %s directive (no_write_streak=%d, ctx=%.0f%%, "
|
|
3361
|
+
"restored_write_tools=%s)",
|
|
3362
|
+
tier, streak, util * 100, restored or "none",
|
|
3313
3363
|
)
|
|
3314
3364
|
|
|
3315
3365
|
|
|
@@ -3555,10 +3605,14 @@ def build_openai_request(
|
|
|
3555
3605
|
)
|
|
3556
3606
|
|
|
3557
3607
|
# Convert Anthropic tools to OpenAI function-calling tools
|
|
3608
|
+
full_openai_tools: list[dict] = []
|
|
3558
3609
|
if has_tools:
|
|
3559
3610
|
openai_body["tools"] = _convert_anthropic_tools_to_openai(
|
|
3560
3611
|
anthropic_body.get("tools", [])
|
|
3561
3612
|
)
|
|
3613
|
+
# Keep the full (pre-narrowing) list so the recon-convergence
|
|
3614
|
+
# guardrail can restore a write tool that narrowing dropped.
|
|
3615
|
+
full_openai_tools = openai_body["tools"]
|
|
3562
3616
|
openai_body["tools"] = _narrow_tools_for_request(
|
|
3563
3617
|
anthropic_body, openai_body["tools"]
|
|
3564
3618
|
)
|
|
@@ -3821,9 +3875,10 @@ def build_openai_request(
|
|
|
3821
3875
|
_apply_tool_call_grammar(openai_body, grammar_override=profile_grammar)
|
|
3822
3876
|
|
|
3823
3877
|
# Recon-convergence guardrail (B1) — runs on every built request so a
|
|
3824
|
-
# session wandering in
|
|
3825
|
-
# deliverable regardless of tool-turn phase.
|
|
3826
|
-
|
|
3878
|
+
# session wandering in exploration without producing a write is nudged
|
|
3879
|
+
# toward its deliverable regardless of tool-turn phase. Passed the full
|
|
3880
|
+
# pre-narrowing toolset so it can restore a dropped write tool.
|
|
3881
|
+
_maybe_inject_recon_convergence(openai_body, monitor, full_openai_tools)
|
|
3827
3882
|
|
|
3828
3883
|
return openai_body
|
|
3829
3884
|
|
|
@@ -5375,10 +5375,13 @@ class TestSlotSaveRestore(unittest.TestCase):
|
|
|
5375
5375
|
|
|
5376
5376
|
class TestReconConvergence(unittest.TestCase):
|
|
5377
5377
|
"""Tests for the B1 recon-convergence guardrail — nudges a session
|
|
5378
|
-
stuck
|
|
5378
|
+
stuck exploring without producing a write toward its deliverable.
|
|
5379
5379
|
|
|
5380
|
-
|
|
5381
|
-
|
|
5380
|
+
The streak is defined as write-tool ABSENCE, not read-tool presence: a
|
|
5381
|
+
real recon agent explores via Bash/WebFetch/Agent, so an "all tools are
|
|
5382
|
+
recognized read-only" test never accumulates. Targets the observed
|
|
5383
|
+
failure: a 664-turn agentic recon task that explored for hours and
|
|
5384
|
+
never converged to the synthesis/write step."""
|
|
5382
5385
|
|
|
5383
5386
|
def setUp(self):
|
|
5384
5387
|
self._threshold = proxy.PROXY_RECON_CONVERGENCE_THRESHOLD
|
|
@@ -5387,37 +5390,60 @@ class TestReconConvergence(unittest.TestCase):
|
|
|
5387
5390
|
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = self._threshold
|
|
5388
5391
|
|
|
5389
5392
|
def test_readonly_turns_increment_the_streak(self):
|
|
5390
|
-
"""Consecutive turns using only read
|
|
5393
|
+
"""Consecutive turns using only read tools grow the streak."""
|
|
5391
5394
|
m = proxy.SessionMonitor(context_window=131072)
|
|
5392
5395
|
for _ in range(5):
|
|
5393
5396
|
m.record_tool_calls(["Read"])
|
|
5394
|
-
self.assertEqual(m.
|
|
5397
|
+
self.assertEqual(m.consecutive_no_write_turns, 5)
|
|
5395
5398
|
m.record_tool_calls(["Grep", "Glob"])
|
|
5396
|
-
self.assertEqual(m.
|
|
5399
|
+
self.assertEqual(m.consecutive_no_write_turns, 6)
|
|
5397
5400
|
|
|
5398
|
-
def
|
|
5401
|
+
def test_bash_and_webfetch_turns_increment_the_streak(self):
|
|
5402
|
+
"""The core fix: exploration via Bash/WebFetch/Agent — tools the old
|
|
5403
|
+
read-only allowlist did not recognize — must grow the streak. The
|
|
5404
|
+
old logic reset on every such turn, so the streak never built."""
|
|
5405
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5406
|
+
m.record_tool_calls(["Bash"])
|
|
5407
|
+
m.record_tool_calls(["WebFetch"])
|
|
5408
|
+
m.record_tool_calls(["Agent"])
|
|
5409
|
+
m.record_tool_calls(["Read", "Bash"]) # mixed exploration, no write
|
|
5410
|
+
self.assertEqual(m.consecutive_no_write_turns, 4)
|
|
5411
|
+
|
|
5412
|
+
def test_write_tool_resets_the_streak(self):
|
|
5399
5413
|
"""A turn using a write/edit tool means the model converged toward
|
|
5400
|
-
|
|
5414
|
+
output — the streak resets to 0."""
|
|
5401
5415
|
m = proxy.SessionMonitor(context_window=131072)
|
|
5402
5416
|
for _ in range(10):
|
|
5403
|
-
m.record_tool_calls(["
|
|
5404
|
-
self.assertEqual(m.
|
|
5417
|
+
m.record_tool_calls(["Bash"])
|
|
5418
|
+
self.assertEqual(m.consecutive_no_write_turns, 10)
|
|
5405
5419
|
m.record_tool_calls(["Write"])
|
|
5406
|
-
self.assertEqual(m.
|
|
5420
|
+
self.assertEqual(m.consecutive_no_write_turns, 0)
|
|
5407
5421
|
|
|
5408
5422
|
def test_mixed_turn_with_one_write_resets(self):
|
|
5409
|
-
"""A turn mixing
|
|
5410
|
-
converging — any
|
|
5423
|
+
"""A turn mixing exploration and a write tool still counts as
|
|
5424
|
+
converging — any write tool resets."""
|
|
5411
5425
|
m = proxy.SessionMonitor(context_window=131072)
|
|
5412
5426
|
for _ in range(10):
|
|
5413
5427
|
m.record_tool_calls(["Read"])
|
|
5414
5428
|
m.record_tool_calls(["Read", "Edit"])
|
|
5415
|
-
self.assertEqual(m.
|
|
5429
|
+
self.assertEqual(m.consecutive_no_write_turns, 0)
|
|
5430
|
+
|
|
5431
|
+
def test_no_tool_turn_leaves_streak_unchanged(self):
|
|
5432
|
+
"""A plain-text turn (no tool calls) is neither exploration nor a
|
|
5433
|
+
write — it must leave the streak untouched, not reset it."""
|
|
5434
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5435
|
+
for _ in range(7):
|
|
5436
|
+
m.record_tool_calls(["Bash"])
|
|
5437
|
+
self.assertEqual(m.consecutive_no_write_turns, 7)
|
|
5438
|
+
m.record_tool_calls([]) # plain-text turn
|
|
5439
|
+
self.assertEqual(m.consecutive_no_write_turns, 7)
|
|
5440
|
+
m.record_tool_calls(["Read"])
|
|
5441
|
+
self.assertEqual(m.consecutive_no_write_turns, 8)
|
|
5416
5442
|
|
|
5417
5443
|
def test_no_injection_below_threshold(self):
|
|
5418
5444
|
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
|
|
5419
5445
|
m = proxy.SessionMonitor(context_window=131072)
|
|
5420
|
-
m.
|
|
5446
|
+
m.consecutive_no_write_turns = 39
|
|
5421
5447
|
body = {"messages": [{"role": "user", "content": "go"}]}
|
|
5422
5448
|
proxy._maybe_inject_recon_convergence(body, m)
|
|
5423
5449
|
self.assertEqual(len(body["messages"]), 1)
|
|
@@ -5425,7 +5451,7 @@ class TestReconConvergence(unittest.TestCase):
|
|
|
5425
5451
|
def test_firm_directive_at_threshold(self):
|
|
5426
5452
|
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
|
|
5427
5453
|
m = proxy.SessionMonitor(context_window=131072)
|
|
5428
|
-
m.
|
|
5454
|
+
m.consecutive_no_write_turns = 45
|
|
5429
5455
|
m.last_input_tokens = 120000
|
|
5430
5456
|
body = {"messages": [{"role": "user", "content": "go"}]}
|
|
5431
5457
|
proxy._maybe_inject_recon_convergence(body, m)
|
|
@@ -5438,7 +5464,7 @@ class TestReconConvergence(unittest.TestCase):
|
|
|
5438
5464
|
"""Once the streak is 2x over threshold, escalate to a hard STOP."""
|
|
5439
5465
|
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
|
|
5440
5466
|
m = proxy.SessionMonitor(context_window=131072)
|
|
5441
|
-
m.
|
|
5467
|
+
m.consecutive_no_write_turns = 80
|
|
5442
5468
|
m.last_input_tokens = 250000 # over budget — the real-incident shape
|
|
5443
5469
|
body = {"messages": [{"role": "user", "content": "go"}]}
|
|
5444
5470
|
proxy._maybe_inject_recon_convergence(body, m)
|
|
@@ -5448,11 +5474,89 @@ class TestReconConvergence(unittest.TestCase):
|
|
|
5448
5474
|
def test_disabled_when_threshold_zero(self):
|
|
5449
5475
|
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 0
|
|
5450
5476
|
m = proxy.SessionMonitor(context_window=131072)
|
|
5451
|
-
m.
|
|
5477
|
+
m.consecutive_no_write_turns = 500
|
|
5452
5478
|
body = {"messages": [{"role": "user", "content": "go"}]}
|
|
5453
5479
|
proxy._maybe_inject_recon_convergence(body, m)
|
|
5454
5480
|
self.assertEqual(len(body["messages"]), 1)
|
|
5455
5481
|
|
|
5482
|
+
@staticmethod
|
|
5483
|
+
def _tool(name: str) -> dict:
|
|
5484
|
+
return {"type": "function", "function": {"name": name, "description": f"{name} tool"}}
|
|
5485
|
+
|
|
5486
|
+
def test_dropped_write_tool_is_restored_when_directive_fires(self):
|
|
5487
|
+
"""The core fix: if narrowing left no write tool in the request,
|
|
5488
|
+
a firing directive re-injects it from the full pre-narrowing set."""
|
|
5489
|
+
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
|
|
5490
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5491
|
+
m.consecutive_no_write_turns = 45
|
|
5492
|
+
# narrowed toolset — exploration tools only, no write tool
|
|
5493
|
+
body = {
|
|
5494
|
+
"messages": [{"role": "user", "content": "go"}],
|
|
5495
|
+
"tools": [self._tool("Read"), self._tool("Grep"), self._tool("Bash")],
|
|
5496
|
+
}
|
|
5497
|
+
# full pre-narrowing set DID include a write tool
|
|
5498
|
+
full = body["tools"] + [self._tool("Edit")]
|
|
5499
|
+
proxy._maybe_inject_recon_convergence(body, m, full)
|
|
5500
|
+
names = [t["function"]["name"] for t in body["tools"]]
|
|
5501
|
+
self.assertIn("Edit", names)
|
|
5502
|
+
|
|
5503
|
+
def test_present_write_tool_not_duplicated(self):
|
|
5504
|
+
"""If a write tool already survived narrowing, it is not added twice."""
|
|
5505
|
+
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
|
|
5506
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5507
|
+
m.consecutive_no_write_turns = 45
|
|
5508
|
+
body = {
|
|
5509
|
+
"messages": [{"role": "user", "content": "go"}],
|
|
5510
|
+
"tools": [self._tool("Read"), self._tool("Edit")],
|
|
5511
|
+
}
|
|
5512
|
+
full = list(body["tools"])
|
|
5513
|
+
proxy._maybe_inject_recon_convergence(body, m, full)
|
|
5514
|
+
names = [t["function"]["name"] for t in body["tools"]]
|
|
5515
|
+
self.assertEqual(names.count("Edit"), 1)
|
|
5516
|
+
|
|
5517
|
+
def test_no_write_tool_anywhere_is_safe(self):
|
|
5518
|
+
"""A recon agent whose toolset has no write tool at all: nothing to
|
|
5519
|
+
restore, no crash."""
|
|
5520
|
+
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
|
|
5521
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5522
|
+
m.consecutive_no_write_turns = 45
|
|
5523
|
+
body = {
|
|
5524
|
+
"messages": [{"role": "user", "content": "go"}],
|
|
5525
|
+
"tools": [self._tool("Read"), self._tool("Bash")],
|
|
5526
|
+
}
|
|
5527
|
+
proxy._maybe_inject_recon_convergence(body, m, list(body["tools"]))
|
|
5528
|
+
names = [t["function"]["name"] for t in body["tools"]]
|
|
5529
|
+
self.assertEqual(names, ["Read", "Bash"])
|
|
5530
|
+
|
|
5531
|
+
def test_full_tools_omitted_is_safe(self):
|
|
5532
|
+
"""Called without full_tools (default None) — directive still fires,
|
|
5533
|
+
no tool restoration attempted, no crash."""
|
|
5534
|
+
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
|
|
5535
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5536
|
+
m.consecutive_no_write_turns = 45
|
|
5537
|
+
body = {
|
|
5538
|
+
"messages": [{"role": "user", "content": "go"}],
|
|
5539
|
+
"tools": [self._tool("Read")],
|
|
5540
|
+
}
|
|
5541
|
+
proxy._maybe_inject_recon_convergence(body, m)
|
|
5542
|
+
self.assertEqual(len(body["messages"]), 2)
|
|
5543
|
+
self.assertEqual([t["function"]["name"] for t in body["tools"]], ["Read"])
|
|
5544
|
+
|
|
5545
|
+
def test_no_restore_below_threshold(self):
|
|
5546
|
+
"""Below threshold the directive does not fire, so no write tool is
|
|
5547
|
+
restored even if narrowing dropped one."""
|
|
5548
|
+
proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
|
|
5549
|
+
m = proxy.SessionMonitor(context_window=131072)
|
|
5550
|
+
m.consecutive_no_write_turns = 39
|
|
5551
|
+
body = {
|
|
5552
|
+
"messages": [{"role": "user", "content": "go"}],
|
|
5553
|
+
"tools": [self._tool("Read")],
|
|
5554
|
+
}
|
|
5555
|
+
full = body["tools"] + [self._tool("Write")]
|
|
5556
|
+
proxy._maybe_inject_recon_convergence(body, m, full)
|
|
5557
|
+
names = [t["function"]["name"] for t in body["tools"]]
|
|
5558
|
+
self.assertEqual(names, ["Read"])
|
|
5559
|
+
|
|
5456
5560
|
|
|
5457
5561
|
class TestPrunerRework(unittest.TestCase):
|
|
5458
5562
|
"""Tests for the reworked context pruner (B2 + B3): contiguous
|