@miller-tech/uap 1.20.49 → 1.20.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@miller-tech/uap",
3
- "version": "1.20.49",
3
+ "version": "1.20.51",
4
4
  "description": "Autonomous AI agent memory system with CLAUDE.md protocol enforcement",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -207,6 +207,19 @@ _READ_ONLY_TOOL_CLASS = frozenset({
207
207
  "search", "Search", "list_files", "ListFiles",
208
208
  })
209
209
 
210
+ # Tools that produce or mutate a deliverable. Using any of these in a turn
211
+ # means the agent is converging from exploration toward output, and resets
212
+ # the recon-convergence streak (B1). This is deliberately a SHORT allowlist
213
+ # of write tools, NOT a read-only denylist: exploration happens through an
214
+ # open-ended set of tools (Bash, WebFetch, Agent, ...) that cannot be
215
+ # enumerated, but "the agent produced a write" is a small, stable signal.
216
+ # Names are matched case-insensitively (callers lower() before lookup).
217
+ _WRITE_TOOL_CLASS = frozenset({
218
+ "write", "edit", "multiedit", "notebookedit",
219
+ "str_replace", "str_replace_editor", "str_replace_based_edit_tool",
220
+ "create_file", "applypatch", "apply_patch",
221
+ })
222
+
210
223
  PROXY_GUARDRAIL_RETRY = os.environ.get("PROXY_GUARDRAIL_RETRY", "on").lower() not in {
211
224
  "0",
212
225
  "false",
@@ -224,12 +237,15 @@ PROXY_FINALIZE_CONTINUATION_MAX = int(
224
237
  PROXY_FINALIZE_SESSION_HARD_CAP = int(
225
238
  os.environ.get("PROXY_FINALIZE_SESSION_HARD_CAP", "3")
226
239
  )
227
- # Recon-convergence guardrail: after this many consecutive turns of PURE
228
- # read-only exploration (Read/Grep/Glob/etc. no write/edit/deliverable
229
- # tool), the proxy injects a directive telling the model to stop exploring
230
- # and produce its deliverable. Targets the failure mode where an agentic
231
- # recon task reads files for hundreds of turns and never converges to the
240
+ # Recon-convergence guardrail: after this many consecutive turns that use
241
+ # tools but produce NO write/deliverable tool call (see _WRITE_TOOL_CLASS),
242
+ # the proxy injects a directive telling the model to stop exploring and
243
+ # produce its deliverable. Targets the failure mode where an agentic recon
244
+ # task explores for hundreds of turns and never converges to the
232
245
  # synthesis/write step (observed: 664-turn recon, no deliverable started).
246
+ # Defined as write-tool ABSENCE rather than read-tool presence: a real
247
+ # recon agent explores via Bash/WebFetch/Agent, not just Read/Grep, so a
248
+ # "all tools are recognized read-only" test never accumulates a streak.
233
249
  # 0 disables.
234
250
  PROXY_RECON_CONVERGENCE_THRESHOLD = int(
235
251
  os.environ.get("PROXY_RECON_CONVERGENCE_THRESHOLD", "40")
@@ -727,7 +743,7 @@ class SessionMonitor:
727
743
  )
728
744
  loop_warnings_emitted: int = 0 # How many loop warnings sent to the model
729
745
  no_progress_streak: int = 0 # Forced tool turns without new tool_result
730
- consecutive_readonly_turns: int = 0 # turns of pure read-only exploration (B1)
746
+ consecutive_no_write_turns: int = 0 # turns exploring with no write tool (B1)
731
747
  unexpected_end_turn_count: int = 0 # end_turn without tool_use in active loop
732
748
  tool_starvation_streak: int = 0 # Consecutive forced turns with no tool_calls produced
733
749
  malformed_tool_streak: int = 0 # consecutive malformed pseudo tool payloads
@@ -885,15 +901,19 @@ class SessionMonitor:
885
901
  if len(self.tool_call_history) > 30:
886
902
  self.tool_call_history = self.tool_call_history[-30:]
887
903
 
888
- # Recon-convergence (B1): count consecutive turns of PURE read-only
889
- # exploration. A turn that uses any non-read-only tool (write, edit,
890
- # a deliverable tool) resets the streak — that's the model
891
- # converging from exploration toward synthesis/action.
892
- _ro = {n.lower() for n in _READ_ONLY_TOOL_CLASS}
893
- if tool_names and all(n.lower() in _ro for n in tool_names):
894
- self.consecutive_readonly_turns += 1
895
- else:
896
- self.consecutive_readonly_turns = 0
904
+ # Recon-convergence (B1): count consecutive turns that use tools but
905
+ # produce NO write/deliverable tool call. A turn that uses any write
906
+ # tool resets the streak — that's the model converging from
907
+ # exploration toward synthesis/output. A turn with no tool calls at
908
+ # all is a plain-text turn (neither exploration nor a write) and
909
+ # leaves the streak unchanged. This is the inverse of the old
910
+ # "all tools are recognized read-only" test, which reset on any
911
+ # Bash/WebFetch/Agent turn and so never accumulated for real agents.
912
+ if tool_names:
913
+ if any(n.lower() in _WRITE_TOOL_CLASS for n in tool_names):
914
+ self.consecutive_no_write_turns = 0
915
+ else:
916
+ self.consecutive_no_write_turns += 1
897
917
 
898
918
  # Track read-only tool targets for dedup (Option 3)
899
919
  if tool_targets:
@@ -3268,48 +3288,78 @@ def _resolve_state_machine_tool_choice(
3268
3288
  return None, "unknown_phase"
3269
3289
 
3270
3290
 
3271
- def _maybe_inject_recon_convergence(openai_body: dict, monitor: "SessionMonitor") -> None:
3272
- """Nudge a session stuck in prolonged read-only exploration toward its
3273
- deliverable.
3274
-
3275
- Fires when `consecutive_readonly_turns` crosses
3276
- PROXY_RECON_CONVERGENCE_THRESHOLD the model has read files for many
3277
- turns without writing anything. Targets the observed failure mode of
3278
- an agentic recon task wandering for hundreds of turns and never
3279
- converging to the synthesis/write step. Two escalation tiers: a firm
3280
- "switch to synthesis" directive, then a hard "STOP, write it now" once
3281
- the streak is 2x over threshold.
3291
+ def _maybe_inject_recon_convergence(
3292
+ openai_body: dict,
3293
+ monitor: "SessionMonitor",
3294
+ full_tools: list[dict] | None = None,
3295
+ ) -> None:
3296
+ """Nudge a session stuck in prolonged exploration toward its deliverable.
3297
+
3298
+ Fires when `consecutive_no_write_turns` crosses
3299
+ PROXY_RECON_CONVERGENCE_THRESHOLD the model has used tools for many
3300
+ turns without producing any write/deliverable tool call. Targets the
3301
+ observed failure mode of an agentic recon task wandering for hundreds
3302
+ of turns and never converging to the synthesis/write step. Two
3303
+ escalation tiers: a firm "switch to synthesis" directive, then a hard
3304
+ "STOP, write it now" once the streak is 2x over threshold.
3305
+
3306
+ `full_tools` is the request's tool list *before* `_narrow_tools_for_request`
3307
+ pruned it. When the directive fires, any write/deliverable tool that
3308
+ narrowing dropped is re-injected into `openai_body["tools"]` — narrowing
3309
+ scores tools against the (exploration-heavy) recon prompt and runs before
3310
+ this guardrail, so it routinely strips the very write tool the directive
3311
+ tells the model to use, leaving the directive impossible to satisfy.
3282
3312
  """
3283
3313
  if PROXY_RECON_CONVERGENCE_THRESHOLD <= 0:
3284
3314
  return
3285
- streak = monitor.consecutive_readonly_turns
3315
+ streak = monitor.consecutive_no_write_turns
3286
3316
  if streak < PROXY_RECON_CONVERGENCE_THRESHOLD:
3287
3317
  return
3288
3318
  util = monitor.get_utilization()
3289
3319
  if streak >= 2 * PROXY_RECON_CONVERGENCE_THRESHOLD:
3290
3320
  directive = (
3291
3321
  f"STOP exploring. You have run {streak} consecutive turns of "
3292
- f"read-only exploration and context is at {util * 100:.0f}%. "
3293
- "You will NOT finish if you keep reading files. Produce your "
3294
- "deliverable NOW from the information you already have — write "
3295
- "it to a file with the appropriate tool. Do not read anything else."
3322
+ f"exploration without producing a deliverable and context is at "
3323
+ f"{util * 100:.0f}%. You will NOT finish if you keep exploring. "
3324
+ "Produce your deliverable NOW from the information you already "
3325
+ "have — write it to a file with the appropriate tool. Do not "
3326
+ "read or run anything else."
3296
3327
  )
3297
3328
  tier = "hard"
3298
3329
  else:
3299
3330
  directive = (
3300
- f"You have read files for {streak} consecutive turns without "
3331
+ f"You have explored for {streak} consecutive turns without "
3301
3332
  f"producing a deliverable (context {util * 100:.0f}%). You have "
3302
3333
  "enough to begin. Switch from exploration to synthesis: write "
3303
- "your deliverable now. Read at most one more file, and only if "
3304
- "strictly required to write it."
3334
+ "your deliverable now. Explore at most one more time, and only "
3335
+ "if strictly required to write it."
3305
3336
  )
3306
3337
  tier = "firm"
3307
3338
  msgs = openai_body.get("messages", [])
3308
3339
  msgs.append({"role": "user", "content": directive})
3309
3340
  openai_body["messages"] = msgs
3341
+
3342
+ # Re-inject any write/deliverable tool that narrowing dropped, so the
3343
+ # "write your deliverable" directive is actually satisfiable. Without
3344
+ # this the model is told to write but has no write tool to call, picks
3345
+ # another read tool, and the streak climbs unbounded.
3346
+ restored: list[str] = []
3347
+ if full_tools:
3348
+ present = {
3349
+ (t.get("function", {}).get("name", "") or "").lower()
3350
+ for t in openai_body.get("tools", [])
3351
+ }
3352
+ for tool in full_tools:
3353
+ name = (tool.get("function", {}).get("name", "") or "")
3354
+ if name.lower() in _WRITE_TOOL_CLASS and name.lower() not in present:
3355
+ openai_body.setdefault("tools", []).append(tool)
3356
+ present.add(name.lower())
3357
+ restored.append(name)
3358
+
3310
3359
  logger.warning(
3311
- "RECON CONVERGENCE: injected %s directive (readonly_streak=%d, ctx=%.0f%%)",
3312
- tier, streak, util * 100,
3360
+ "RECON CONVERGENCE: injected %s directive (no_write_streak=%d, ctx=%.0f%%, "
3361
+ "restored_write_tools=%s)",
3362
+ tier, streak, util * 100, restored or "none",
3313
3363
  )
3314
3364
 
3315
3365
 
@@ -3555,10 +3605,14 @@ def build_openai_request(
3555
3605
  )
3556
3606
 
3557
3607
  # Convert Anthropic tools to OpenAI function-calling tools
3608
+ full_openai_tools: list[dict] = []
3558
3609
  if has_tools:
3559
3610
  openai_body["tools"] = _convert_anthropic_tools_to_openai(
3560
3611
  anthropic_body.get("tools", [])
3561
3612
  )
3613
+ # Keep the full (pre-narrowing) list so the recon-convergence
3614
+ # guardrail can restore a write tool that narrowing dropped.
3615
+ full_openai_tools = openai_body["tools"]
3562
3616
  openai_body["tools"] = _narrow_tools_for_request(
3563
3617
  anthropic_body, openai_body["tools"]
3564
3618
  )
@@ -3821,9 +3875,10 @@ def build_openai_request(
3821
3875
  _apply_tool_call_grammar(openai_body, grammar_override=profile_grammar)
3822
3876
 
3823
3877
  # Recon-convergence guardrail (B1) — runs on every built request so a
3824
- # session wandering in read-only exploration is nudged toward its
3825
- # deliverable regardless of tool-turn phase.
3826
- _maybe_inject_recon_convergence(openai_body, monitor)
3878
+ # session wandering in exploration without producing a write is nudged
3879
+ # toward its deliverable regardless of tool-turn phase. Passed the full
3880
+ # pre-narrowing toolset so it can restore a dropped write tool.
3881
+ _maybe_inject_recon_convergence(openai_body, monitor, full_openai_tools)
3827
3882
 
3828
3883
  return openai_body
3829
3884
 
@@ -5375,10 +5375,13 @@ class TestSlotSaveRestore(unittest.TestCase):
5375
5375
 
5376
5376
  class TestReconConvergence(unittest.TestCase):
5377
5377
  """Tests for the B1 recon-convergence guardrail — nudges a session
5378
- stuck doing read-only exploration toward producing its deliverable.
5378
+ stuck exploring without producing a write toward its deliverable.
5379
5379
 
5380
- Targets the observed failure: a 664-turn agentic recon task that read
5381
- files for hours and never converged to the synthesis/write step."""
5380
+ The streak is defined as write-tool ABSENCE, not read-tool presence: a
5381
+ real recon agent explores via Bash/WebFetch/Agent, so an "all tools are
5382
+ recognized read-only" test never accumulates. Targets the observed
5383
+ failure: a 664-turn agentic recon task that explored for hours and
5384
+ never converged to the synthesis/write step."""
5382
5385
 
5383
5386
  def setUp(self):
5384
5387
  self._threshold = proxy.PROXY_RECON_CONVERGENCE_THRESHOLD
@@ -5387,37 +5390,60 @@ class TestReconConvergence(unittest.TestCase):
5387
5390
  proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = self._threshold
5388
5391
 
5389
5392
  def test_readonly_turns_increment_the_streak(self):
5390
- """Consecutive turns using only read-only tools grow the streak."""
5393
+ """Consecutive turns using only read tools grow the streak."""
5391
5394
  m = proxy.SessionMonitor(context_window=131072)
5392
5395
  for _ in range(5):
5393
5396
  m.record_tool_calls(["Read"])
5394
- self.assertEqual(m.consecutive_readonly_turns, 5)
5397
+ self.assertEqual(m.consecutive_no_write_turns, 5)
5395
5398
  m.record_tool_calls(["Grep", "Glob"])
5396
- self.assertEqual(m.consecutive_readonly_turns, 6)
5399
+ self.assertEqual(m.consecutive_no_write_turns, 6)
5397
5400
 
5398
- def test_non_readonly_tool_resets_the_streak(self):
5401
+ def test_bash_and_webfetch_turns_increment_the_streak(self):
5402
+ """The core fix: exploration via Bash/WebFetch/Agent — tools the old
5403
+ read-only allowlist did not recognize — must grow the streak. The
5404
+ old logic reset on every such turn, so the streak never built."""
5405
+ m = proxy.SessionMonitor(context_window=131072)
5406
+ m.record_tool_calls(["Bash"])
5407
+ m.record_tool_calls(["WebFetch"])
5408
+ m.record_tool_calls(["Agent"])
5409
+ m.record_tool_calls(["Read", "Bash"]) # mixed exploration, no write
5410
+ self.assertEqual(m.consecutive_no_write_turns, 4)
5411
+
5412
+ def test_write_tool_resets_the_streak(self):
5399
5413
  """A turn using a write/edit tool means the model converged toward
5400
- action — the streak resets to 0."""
5414
+ output — the streak resets to 0."""
5401
5415
  m = proxy.SessionMonitor(context_window=131072)
5402
5416
  for _ in range(10):
5403
- m.record_tool_calls(["Read"])
5404
- self.assertEqual(m.consecutive_readonly_turns, 10)
5417
+ m.record_tool_calls(["Bash"])
5418
+ self.assertEqual(m.consecutive_no_write_turns, 10)
5405
5419
  m.record_tool_calls(["Write"])
5406
- self.assertEqual(m.consecutive_readonly_turns, 0)
5420
+ self.assertEqual(m.consecutive_no_write_turns, 0)
5407
5421
 
5408
5422
  def test_mixed_turn_with_one_write_resets(self):
5409
- """A turn mixing read-only and a write tool still counts as
5410
- converging — any non-read-only tool resets."""
5423
+ """A turn mixing exploration and a write tool still counts as
5424
+ converging — any write tool resets."""
5411
5425
  m = proxy.SessionMonitor(context_window=131072)
5412
5426
  for _ in range(10):
5413
5427
  m.record_tool_calls(["Read"])
5414
5428
  m.record_tool_calls(["Read", "Edit"])
5415
- self.assertEqual(m.consecutive_readonly_turns, 0)
5429
+ self.assertEqual(m.consecutive_no_write_turns, 0)
5430
+
5431
+ def test_no_tool_turn_leaves_streak_unchanged(self):
5432
+ """A plain-text turn (no tool calls) is neither exploration nor a
5433
+ write — it must leave the streak untouched, not reset it."""
5434
+ m = proxy.SessionMonitor(context_window=131072)
5435
+ for _ in range(7):
5436
+ m.record_tool_calls(["Bash"])
5437
+ self.assertEqual(m.consecutive_no_write_turns, 7)
5438
+ m.record_tool_calls([]) # plain-text turn
5439
+ self.assertEqual(m.consecutive_no_write_turns, 7)
5440
+ m.record_tool_calls(["Read"])
5441
+ self.assertEqual(m.consecutive_no_write_turns, 8)
5416
5442
 
5417
5443
  def test_no_injection_below_threshold(self):
5418
5444
  proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
5419
5445
  m = proxy.SessionMonitor(context_window=131072)
5420
- m.consecutive_readonly_turns = 39
5446
+ m.consecutive_no_write_turns = 39
5421
5447
  body = {"messages": [{"role": "user", "content": "go"}]}
5422
5448
  proxy._maybe_inject_recon_convergence(body, m)
5423
5449
  self.assertEqual(len(body["messages"]), 1)
@@ -5425,7 +5451,7 @@ class TestReconConvergence(unittest.TestCase):
5425
5451
  def test_firm_directive_at_threshold(self):
5426
5452
  proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
5427
5453
  m = proxy.SessionMonitor(context_window=131072)
5428
- m.consecutive_readonly_turns = 45
5454
+ m.consecutive_no_write_turns = 45
5429
5455
  m.last_input_tokens = 120000
5430
5456
  body = {"messages": [{"role": "user", "content": "go"}]}
5431
5457
  proxy._maybe_inject_recon_convergence(body, m)
@@ -5438,7 +5464,7 @@ class TestReconConvergence(unittest.TestCase):
5438
5464
  """Once the streak is 2x over threshold, escalate to a hard STOP."""
5439
5465
  proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
5440
5466
  m = proxy.SessionMonitor(context_window=131072)
5441
- m.consecutive_readonly_turns = 80
5467
+ m.consecutive_no_write_turns = 80
5442
5468
  m.last_input_tokens = 250000 # over budget — the real-incident shape
5443
5469
  body = {"messages": [{"role": "user", "content": "go"}]}
5444
5470
  proxy._maybe_inject_recon_convergence(body, m)
@@ -5448,11 +5474,89 @@ class TestReconConvergence(unittest.TestCase):
5448
5474
  def test_disabled_when_threshold_zero(self):
5449
5475
  proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 0
5450
5476
  m = proxy.SessionMonitor(context_window=131072)
5451
- m.consecutive_readonly_turns = 500
5477
+ m.consecutive_no_write_turns = 500
5452
5478
  body = {"messages": [{"role": "user", "content": "go"}]}
5453
5479
  proxy._maybe_inject_recon_convergence(body, m)
5454
5480
  self.assertEqual(len(body["messages"]), 1)
5455
5481
 
5482
+ @staticmethod
5483
+ def _tool(name: str) -> dict:
5484
+ return {"type": "function", "function": {"name": name, "description": f"{name} tool"}}
5485
+
5486
+ def test_dropped_write_tool_is_restored_when_directive_fires(self):
5487
+ """The core fix: if narrowing left no write tool in the request,
5488
+ a firing directive re-injects it from the full pre-narrowing set."""
5489
+ proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
5490
+ m = proxy.SessionMonitor(context_window=131072)
5491
+ m.consecutive_no_write_turns = 45
5492
+ # narrowed toolset — exploration tools only, no write tool
5493
+ body = {
5494
+ "messages": [{"role": "user", "content": "go"}],
5495
+ "tools": [self._tool("Read"), self._tool("Grep"), self._tool("Bash")],
5496
+ }
5497
+ # full pre-narrowing set DID include a write tool
5498
+ full = body["tools"] + [self._tool("Edit")]
5499
+ proxy._maybe_inject_recon_convergence(body, m, full)
5500
+ names = [t["function"]["name"] for t in body["tools"]]
5501
+ self.assertIn("Edit", names)
5502
+
5503
+ def test_present_write_tool_not_duplicated(self):
5504
+ """If a write tool already survived narrowing, it is not added twice."""
5505
+ proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
5506
+ m = proxy.SessionMonitor(context_window=131072)
5507
+ m.consecutive_no_write_turns = 45
5508
+ body = {
5509
+ "messages": [{"role": "user", "content": "go"}],
5510
+ "tools": [self._tool("Read"), self._tool("Edit")],
5511
+ }
5512
+ full = list(body["tools"])
5513
+ proxy._maybe_inject_recon_convergence(body, m, full)
5514
+ names = [t["function"]["name"] for t in body["tools"]]
5515
+ self.assertEqual(names.count("Edit"), 1)
5516
+
5517
+ def test_no_write_tool_anywhere_is_safe(self):
5518
+ """A recon agent whose toolset has no write tool at all: nothing to
5519
+ restore, no crash."""
5520
+ proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
5521
+ m = proxy.SessionMonitor(context_window=131072)
5522
+ m.consecutive_no_write_turns = 45
5523
+ body = {
5524
+ "messages": [{"role": "user", "content": "go"}],
5525
+ "tools": [self._tool("Read"), self._tool("Bash")],
5526
+ }
5527
+ proxy._maybe_inject_recon_convergence(body, m, list(body["tools"]))
5528
+ names = [t["function"]["name"] for t in body["tools"]]
5529
+ self.assertEqual(names, ["Read", "Bash"])
5530
+
5531
+ def test_full_tools_omitted_is_safe(self):
5532
+ """Called without full_tools (default None) — directive still fires,
5533
+ no tool restoration attempted, no crash."""
5534
+ proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
5535
+ m = proxy.SessionMonitor(context_window=131072)
5536
+ m.consecutive_no_write_turns = 45
5537
+ body = {
5538
+ "messages": [{"role": "user", "content": "go"}],
5539
+ "tools": [self._tool("Read")],
5540
+ }
5541
+ proxy._maybe_inject_recon_convergence(body, m)
5542
+ self.assertEqual(len(body["messages"]), 2)
5543
+ self.assertEqual([t["function"]["name"] for t in body["tools"]], ["Read"])
5544
+
5545
+ def test_no_restore_below_threshold(self):
5546
+ """Below threshold the directive does not fire, so no write tool is
5547
+ restored even if narrowing dropped one."""
5548
+ proxy.PROXY_RECON_CONVERGENCE_THRESHOLD = 40
5549
+ m = proxy.SessionMonitor(context_window=131072)
5550
+ m.consecutive_no_write_turns = 39
5551
+ body = {
5552
+ "messages": [{"role": "user", "content": "go"}],
5553
+ "tools": [self._tool("Read")],
5554
+ }
5555
+ full = body["tools"] + [self._tool("Write")]
5556
+ proxy._maybe_inject_recon_convergence(body, m, full)
5557
+ names = [t["function"]["name"] for t in body["tools"]]
5558
+ self.assertEqual(names, ["Read"])
5559
+
5456
5560
 
5457
5561
  class TestPrunerRework(unittest.TestCase):
5458
5562
  """Tests for the reworked context pruner (B2 + B3): contiguous