@miller-tech/uap 1.15.1 → 1.15.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
+ import asyncio
3
4
  import importlib.util
5
+ import json
4
6
  import unittest
5
7
  from pathlib import Path
6
8
 
@@ -17,6 +19,27 @@ def _load_proxy_module():
17
19
  proxy = _load_proxy_module()
18
20
 
19
21
 
22
+ class _FakeResponse:
23
+ def __init__(self, payload, status_code=200):
24
+ self._payload = payload
25
+ self.status_code = status_code
26
+
27
+ def json(self):
28
+ return self._payload
29
+
30
+
31
+ class _FakeClient:
32
+ def __init__(self, responses):
33
+ self._responses = list(responses)
34
+ self.requests = []
35
+
36
+ async def post(self, *args, **kwargs):
37
+ self.requests.append({"args": args, "kwargs": kwargs})
38
+ if not self._responses:
39
+ raise AssertionError("No fake response queued")
40
+ return self._responses.pop(0)
41
+
42
+
20
43
  class TestStreamingReasoningFallback(unittest.TestCase):
21
44
  def test_fallback_disabled_returns_none(self):
22
45
  text = proxy._build_reasoning_fallback_text(
@@ -185,6 +208,14 @@ class TestMalformedToolGuardrail(unittest.TestCase):
185
208
  }
186
209
  self.assertTrue(proxy._is_malformed_tool_response(openai_resp, anthropic_body))
187
210
 
211
+ def test_tool_call_apology_helper_detects_phrase(self):
212
+ apology_text = (
213
+ "I could not produce a valid tool-call format in this turn. "
214
+ "Please continue; I will issue exactly one valid tool call next."
215
+ )
216
+ self.assertTrue(proxy._contains_tool_call_apology(apology_text))
217
+ self.assertFalse(proxy._contains_tool_call_apology("normal assistant response"))
218
+
188
219
  def test_clean_tool_call_response_is_not_malformed(self):
189
220
  openai_resp = {
190
221
  "choices": [
@@ -442,6 +473,505 @@ class TestMalformedToolGuardrail(unittest.TestCase):
442
473
  self.assertIn("Please retry the same request", text)
443
474
  self.assertNotIn("I will issue exactly one valid tool call next", text)
444
475
 
476
+ def test_openai_to_anthropic_response_sanitizes_tool_call_apology(self):
477
+ openai_resp = {
478
+ "choices": [
479
+ {
480
+ "finish_reason": "stop",
481
+ "message": {
482
+ "content": (
483
+ "I could not produce a valid tool-call format in this turn. "
484
+ "Please continue; I will issue exactly one valid tool call next."
485
+ ),
486
+ "tool_calls": [],
487
+ },
488
+ }
489
+ ]
490
+ }
491
+
492
+ converted = proxy.openai_to_anthropic_response(openai_resp, "test-model")
493
+ text = converted["content"][0]["text"]
494
+ self.assertIn("Please retry the same request", text)
495
+ self.assertNotIn("I will issue exactly one valid tool call next", text)
496
+
497
+ def test_preflight_flags_invalid_json_tool_arguments(self):
498
+ openai_resp = {
499
+ "choices": [
500
+ {
501
+ "finish_reason": "tool_calls",
502
+ "message": {
503
+ "content": "",
504
+ "tool_calls": [
505
+ {
506
+ "id": "call_1",
507
+ "function": {
508
+ "name": "ScheduleJob",
509
+ "arguments": '{"cron":',
510
+ },
511
+ }
512
+ ],
513
+ },
514
+ }
515
+ ]
516
+ }
517
+ anthropic_body = {
518
+ "tools": [
519
+ {
520
+ "name": "ScheduleJob",
521
+ "input_schema": {
522
+ "type": "object",
523
+ "required": ["cron"],
524
+ "properties": {"cron": {"type": "string", "minLength": 1}},
525
+ },
526
+ }
527
+ ]
528
+ }
529
+
530
+ issue = proxy._classify_tool_response_issue(openai_resp, anthropic_body)
531
+ self.assertEqual(issue.kind, "malformed_payload")
532
+ self.assertIn("malformed pseudo tool payload", issue.reason)
533
+
534
+ def test_preflight_flags_empty_required_field(self):
535
+ openai_resp = {
536
+ "choices": [
537
+ {
538
+ "finish_reason": "tool_calls",
539
+ "message": {
540
+ "content": "",
541
+ "tool_calls": [
542
+ {
543
+ "id": "call_1",
544
+ "function": {
545
+ "name": "ScheduleJob",
546
+ "arguments": '{"cron":"","command":"echo hi"}',
547
+ },
548
+ }
549
+ ],
550
+ },
551
+ }
552
+ ]
553
+ }
554
+ anthropic_body = {
555
+ "tools": [
556
+ {
557
+ "name": "ScheduleJob",
558
+ "input_schema": {
559
+ "type": "object",
560
+ "required": ["cron", "command"],
561
+ "properties": {
562
+ "cron": {"type": "string", "minLength": 1},
563
+ "command": {"type": "string", "minLength": 1},
564
+ },
565
+ },
566
+ }
567
+ ]
568
+ }
569
+
570
+ issue = proxy._classify_tool_response_issue(openai_resp, anthropic_body)
571
+ self.assertEqual(issue.kind, "malformed_payload")
572
+ self.assertIn("malformed pseudo tool payload", issue.reason)
573
+
574
+ def test_preflight_flags_markup_inside_arguments(self):
575
+ openai_resp = {
576
+ "choices": [
577
+ {
578
+ "finish_reason": "tool_calls",
579
+ "message": {
580
+ "content": "",
581
+ "tool_calls": [
582
+ {
583
+ "id": "call_1",
584
+ "function": {
585
+ "name": "ScheduleJob",
586
+ "arguments": '{"cron":"*/5 * * * *","command":"<parameter>bad</parameter>"}',
587
+ },
588
+ }
589
+ ],
590
+ },
591
+ }
592
+ ]
593
+ }
594
+ anthropic_body = {
595
+ "tools": [
596
+ {
597
+ "name": "ScheduleJob",
598
+ "input_schema": {
599
+ "type": "object",
600
+ "required": ["cron", "command"],
601
+ "properties": {
602
+ "cron": {"type": "string"},
603
+ "command": {"type": "string"},
604
+ },
605
+ },
606
+ }
607
+ ]
608
+ }
609
+
610
+ issue = proxy._classify_tool_response_issue(openai_resp, anthropic_body)
611
+ self.assertEqual(issue.kind, "malformed_payload")
612
+ self.assertIn("malformed pseudo tool payload", issue.reason)
613
+
614
+ def test_required_tool_turn_without_tool_call_is_flagged(self):
615
+ openai_resp = {
616
+ "choices": [
617
+ {
618
+ "finish_reason": "stop",
619
+ "message": {
620
+ "content": "Done.",
621
+ "tool_calls": [],
622
+ },
623
+ }
624
+ ]
625
+ }
626
+ anthropic_body = {
627
+ "tools": [{"name": "Edit", "input_schema": {"type": "object"}}],
628
+ }
629
+
630
+ issue = proxy._classify_tool_response_issue(
631
+ openai_resp, anthropic_body, required_tool_choice=True
632
+ )
633
+ self.assertEqual(issue.kind, "required_tool_miss")
634
+
635
+ def test_markup_repair_sanitizes_tool_arguments(self):
636
+ openai_resp = {
637
+ "choices": [
638
+ {
639
+ "finish_reason": "tool_calls",
640
+ "message": {
641
+ "content": "",
642
+ "tool_calls": [
643
+ {
644
+ "id": "call_1",
645
+ "function": {
646
+ "name": "Bash",
647
+ "arguments": '{"command":"echo ok </think> </parameter>"}',
648
+ },
649
+ }
650
+ ],
651
+ },
652
+ }
653
+ ]
654
+ }
655
+
656
+ repaired, count = proxy._repair_tool_call_markup(openai_resp)
657
+ self.assertEqual(count, 1)
658
+ args = repaired["choices"][0]["message"]["tool_calls"][0]["function"][
659
+ "arguments"
660
+ ]
661
+ self.assertNotIn("</think>", args)
662
+ self.assertNotIn("</parameter>", args)
663
+
664
+ def test_markup_repair_recovers_json_after_tag_stripping(self):
665
+ openai_resp = {
666
+ "choices": [
667
+ {
668
+ "finish_reason": "tool_calls",
669
+ "message": {
670
+ "content": "",
671
+ "tool_calls": [
672
+ {
673
+ "id": "call_1",
674
+ "function": {
675
+ "name": "Bash",
676
+ "arguments": '</parameter>{"command":"ls"}',
677
+ },
678
+ }
679
+ ],
680
+ },
681
+ }
682
+ ]
683
+ }
684
+
685
+ repaired, count = proxy._repair_tool_call_markup(openai_resp)
686
+ self.assertEqual(count, 1)
687
+ args = json.loads(
688
+ repaired["choices"][0]["message"]["tool_calls"][0]["function"]["arguments"]
689
+ )
690
+ self.assertEqual(args["command"], "ls")
691
+
692
+ def test_guardrail_accepts_repaired_markup_without_fallback(self):
693
+ old_retry = getattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_MAX")
694
+ try:
695
+ setattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_MAX", 0)
696
+
697
+ monitor = proxy.SessionMonitor(context_window=262144)
698
+ openai_resp = {
699
+ "choices": [
700
+ {
701
+ "finish_reason": "tool_calls",
702
+ "message": {
703
+ "content": "",
704
+ "tool_calls": [
705
+ {
706
+ "id": "call_1",
707
+ "function": {
708
+ "name": "Bash",
709
+ "arguments": '{"command":"ls </parameter>"}',
710
+ },
711
+ }
712
+ ],
713
+ },
714
+ }
715
+ ]
716
+ }
717
+ anthropic_body = {
718
+ "tools": [
719
+ {
720
+ "name": "Bash",
721
+ "input_schema": {
722
+ "type": "object",
723
+ "required": ["command"],
724
+ "properties": {
725
+ "command": {"type": "string", "minLength": 1}
726
+ },
727
+ },
728
+ }
729
+ ],
730
+ "messages": [{"role": "user", "content": "run command"}],
731
+ }
732
+ openai_body = {
733
+ "model": "test",
734
+ "messages": [{"role": "user", "content": "run command"}],
735
+ "tool_choice": "required",
736
+ }
737
+
738
+ result = asyncio.run(
739
+ proxy._apply_malformed_tool_guardrail(
740
+ _FakeClient([]),
741
+ openai_resp,
742
+ openai_body,
743
+ anthropic_body,
744
+ monitor,
745
+ "session-repair",
746
+ )
747
+ )
748
+
749
+ self.assertTrue(result["choices"][0]["message"].get("tool_calls"))
750
+ args = result["choices"][0]["message"]["tool_calls"][0]["function"][
751
+ "arguments"
752
+ ]
753
+ self.assertNotIn("</parameter>", args)
754
+ self.assertEqual(monitor.arg_preflight_repairs, 1)
755
+ finally:
756
+ setattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_MAX", old_retry)
757
+
758
+ def test_required_field_repair_fills_missing_required_values(self):
759
+ openai_resp = {
760
+ "choices": [
761
+ {
762
+ "finish_reason": "tool_calls",
763
+ "message": {
764
+ "content": "",
765
+ "tool_calls": [
766
+ {
767
+ "id": "call_1",
768
+ "function": {
769
+ "name": "ScheduleJob",
770
+ "arguments": '{"cron":""}',
771
+ },
772
+ }
773
+ ],
774
+ },
775
+ }
776
+ ]
777
+ }
778
+ anthropic_body = {
779
+ "tools": [
780
+ {
781
+ "name": "ScheduleJob",
782
+ "input_schema": {
783
+ "type": "object",
784
+ "required": ["cron", "pattern", "subject"],
785
+ "properties": {
786
+ "cron": {"type": "string", "minLength": 1},
787
+ "pattern": {"type": "string", "minLength": 1},
788
+ "subject": {"type": "string", "minLength": 1},
789
+ },
790
+ },
791
+ }
792
+ ]
793
+ }
794
+
795
+ repaired, count = proxy._repair_required_tool_args(openai_resp, anthropic_body)
796
+ self.assertEqual(count, 1)
797
+ args_text = repaired["choices"][0]["message"]["tool_calls"][0]["function"][
798
+ "arguments"
799
+ ]
800
+ args = json.loads(args_text)
801
+ self.assertTrue(args["cron"].strip())
802
+ self.assertTrue(args["pattern"].strip())
803
+ self.assertTrue(args["subject"].strip())
804
+
805
+ def test_guardrail_accepts_required_field_repair_without_fallback(self):
806
+ old_retry = getattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_MAX")
807
+ try:
808
+ setattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_MAX", 0)
809
+
810
+ monitor = proxy.SessionMonitor(context_window=262144)
811
+ openai_resp = {
812
+ "choices": [
813
+ {
814
+ "finish_reason": "tool_calls",
815
+ "message": {
816
+ "content": "",
817
+ "tool_calls": [
818
+ {
819
+ "id": "call_1",
820
+ "function": {
821
+ "name": "ScheduleJob",
822
+ "arguments": '{"cron":""}',
823
+ },
824
+ }
825
+ ],
826
+ },
827
+ }
828
+ ]
829
+ }
830
+ anthropic_body = {
831
+ "tools": [
832
+ {
833
+ "name": "ScheduleJob",
834
+ "input_schema": {
835
+ "type": "object",
836
+ "required": ["cron", "pattern", "subject"],
837
+ "properties": {
838
+ "cron": {"type": "string", "minLength": 1},
839
+ "pattern": {"type": "string", "minLength": 1},
840
+ "subject": {"type": "string", "minLength": 1},
841
+ },
842
+ },
843
+ }
844
+ ],
845
+ "messages": [{"role": "user", "content": "schedule it"}],
846
+ }
847
+ openai_body = {
848
+ "model": "test",
849
+ "messages": [{"role": "user", "content": "schedule it"}],
850
+ "tool_choice": "required",
851
+ }
852
+
853
+ result = asyncio.run(
854
+ proxy._apply_malformed_tool_guardrail(
855
+ _FakeClient([]),
856
+ openai_resp,
857
+ openai_body,
858
+ anthropic_body,
859
+ monitor,
860
+ "session-repair-required",
861
+ )
862
+ )
863
+
864
+ args = json.loads(
865
+ result["choices"][0]["message"]["tool_calls"][0]["function"][
866
+ "arguments"
867
+ ]
868
+ )
869
+ self.assertTrue(args["cron"].strip())
870
+ self.assertTrue(args["pattern"].strip())
871
+ self.assertTrue(args["subject"].strip())
872
+ self.assertEqual(monitor.arg_preflight_repairs, 1)
873
+ finally:
874
+ setattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_MAX", old_retry)
875
+
876
+ def test_guardrail_retries_invalid_tool_args_and_recovers(self):
877
+ old_retry = getattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_MAX")
878
+ try:
879
+ setattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_MAX", 1)
880
+
881
+ monitor = proxy.SessionMonitor(context_window=262144)
882
+ monitor.consecutive_forced_count = 7
883
+
884
+ initial_resp = {
885
+ "choices": [
886
+ {
887
+ "finish_reason": "tool_calls",
888
+ "message": {
889
+ "content": "",
890
+ "tool_calls": [
891
+ {
892
+ "id": "call_1",
893
+ "function": {
894
+ "name": "ScheduleJob",
895
+ "arguments": '{"cron":"","command":"echo hi"}',
896
+ },
897
+ }
898
+ ],
899
+ },
900
+ }
901
+ ]
902
+ }
903
+ repaired_resp = {
904
+ "choices": [
905
+ {
906
+ "finish_reason": "tool_calls",
907
+ "message": {
908
+ "content": "",
909
+ "tool_calls": [
910
+ {
911
+ "id": "call_2",
912
+ "function": {
913
+ "name": "ScheduleJob",
914
+ "arguments": '{"cron":"*/5 * * * *","command":"echo hi"}',
915
+ },
916
+ }
917
+ ],
918
+ },
919
+ }
920
+ ]
921
+ }
922
+
923
+ fake_client = _FakeClient([_FakeResponse(repaired_resp)])
924
+ openai_body = {
925
+ "model": "test",
926
+ "messages": [{"role": "user", "content": "schedule this job"}],
927
+ "tool_choice": "required",
928
+ }
929
+ anthropic_body = {
930
+ "tools": [
931
+ {
932
+ "name": "ScheduleJob",
933
+ "input_schema": {
934
+ "type": "object",
935
+ "required": ["cron", "command"],
936
+ "properties": {
937
+ "cron": {"type": "string", "minLength": 1},
938
+ "command": {"type": "string", "minLength": 1},
939
+ },
940
+ },
941
+ }
942
+ ],
943
+ "messages": [{"role": "user", "content": "schedule this job"}],
944
+ }
945
+
946
+ result = asyncio.run(
947
+ proxy._apply_malformed_tool_guardrail(
948
+ fake_client,
949
+ initial_resp,
950
+ openai_body,
951
+ anthropic_body,
952
+ monitor,
953
+ "session-test",
954
+ )
955
+ )
956
+
957
+ args = json.loads(
958
+ result["choices"][0]["message"]["tool_calls"][0]["function"][
959
+ "arguments"
960
+ ]
961
+ )
962
+ self.assertTrue(args["cron"].strip())
963
+ self.assertTrue(args["command"].strip())
964
+ self.assertTrue(
965
+ monitor.arg_preflight_repairs >= 1
966
+ or monitor.arg_preflight_rejections >= 1
967
+ )
968
+ if fake_client.requests:
969
+ retry_payload = fake_client.requests[0]["kwargs"]["json"]
970
+ repair_message = retry_payload["messages"][-1]["content"]
971
+ self.assertIn("TOOL CALL REPAIR", repair_message)
972
+ finally:
973
+ setattr(proxy, "PROXY_MALFORMED_TOOL_RETRY_MAX", old_retry)
974
+
445
975
 
446
976
  class TestToolTurnControls(unittest.TestCase):
447
977
  def test_tool_narrowing_reduces_tool_count(self):
@@ -518,6 +1048,89 @@ class TestToolTurnControls(unittest.TestCase):
518
1048
  finally:
519
1049
  setattr(proxy, "PROXY_DISABLE_THINKING_ON_TOOL_TURNS", old_disable)
520
1050
 
1051
+ def test_forced_tool_dampener_temporarily_releases_required(self):
1052
+ old_enabled = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER")
1053
+ old_min_forced = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED")
1054
+ old_bad_streak = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK")
1055
+ old_empty_streak = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK")
1056
+ old_rejections = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_REJECTIONS")
1057
+ old_auto_turns = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS")
1058
+ try:
1059
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER", True)
1060
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED", 3)
1061
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK", 1)
1062
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK", 1)
1063
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_REJECTIONS", 2)
1064
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS", 2)
1065
+
1066
+ monitor = proxy.SessionMonitor(context_window=262144)
1067
+ monitor.consecutive_forced_count = 3
1068
+ monitor.invalid_tool_call_streak = 1
1069
+
1070
+ activated = monitor.maybe_activate_forced_tool_dampener("invalid_tool_args")
1071
+ self.assertTrue(activated)
1072
+ self.assertEqual(monitor.forced_auto_cooldown_turns, 2)
1073
+
1074
+ body = {
1075
+ "model": "test",
1076
+ "messages": [
1077
+ {
1078
+ "role": "assistant",
1079
+ "content": [{"type": "text", "text": "I will continue."}],
1080
+ },
1081
+ {"role": "user", "content": "keep going"},
1082
+ ],
1083
+ "tools": [
1084
+ {
1085
+ "name": "Read",
1086
+ "description": "Read file",
1087
+ "input_schema": {"type": "object"},
1088
+ }
1089
+ ],
1090
+ }
1091
+
1092
+ openai = proxy.build_openai_request(body, monitor)
1093
+ self.assertEqual(openai.get("tool_choice"), "auto")
1094
+ self.assertEqual(monitor.forced_auto_cooldown_turns, 1)
1095
+ finally:
1096
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER", old_enabled)
1097
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED", old_min_forced)
1098
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK", old_bad_streak)
1099
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK", old_empty_streak)
1100
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_REJECTIONS", old_rejections)
1101
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS", old_auto_turns)
1102
+
1103
+ def test_forced_tool_dampener_uses_rejection_pressure(self):
1104
+ old_enabled = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER")
1105
+ old_min_forced = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED")
1106
+ old_bad_streak = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK")
1107
+ old_empty_streak = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK")
1108
+ old_rejections = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_REJECTIONS")
1109
+ old_auto_turns = getattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS")
1110
+ try:
1111
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER", True)
1112
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED", 3)
1113
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK", 5)
1114
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK", 5)
1115
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_REJECTIONS", 2)
1116
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS", 1)
1117
+
1118
+ monitor = proxy.SessionMonitor(context_window=262144)
1119
+ monitor.consecutive_forced_count = 3
1120
+ monitor.arg_preflight_rejections = 2
1121
+
1122
+ activated = monitor.maybe_activate_forced_tool_dampener("invalid_tool_args")
1123
+ self.assertTrue(activated)
1124
+ self.assertEqual(monitor.forced_auto_cooldown_turns, 1)
1125
+ self.assertEqual(monitor.arg_preflight_rejections, 0)
1126
+ finally:
1127
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER", old_enabled)
1128
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_MIN_FORCED", old_min_forced)
1129
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_BAD_STREAK", old_bad_streak)
1130
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_EMPTY_STREAK", old_empty_streak)
1131
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_REJECTIONS", old_rejections)
1132
+ setattr(proxy, "PROXY_FORCED_TOOL_DAMPENER_AUTO_TURNS", old_auto_turns)
1133
+
521
1134
  def test_no_tools_does_not_inject_agentic_system_message(self):
522
1135
  body = {
523
1136
  "model": "test",
@@ -631,6 +1244,54 @@ class TestSessionContaminationBreaker(unittest.TestCase):
631
1244
  setattr(proxy, "PROXY_SESSION_CONTAMINATION_THRESHOLD", old_threshold)
632
1245
  setattr(proxy, "PROXY_SESSION_CONTAMINATION_KEEP_LAST", old_keep)
633
1246
 
1247
+ def test_contamination_breaker_triggers_on_forced_invalid_combo(self):
1248
+ old_enabled = getattr(proxy, "PROXY_SESSION_CONTAMINATION_BREAKER")
1249
+ old_threshold = getattr(proxy, "PROXY_SESSION_CONTAMINATION_THRESHOLD")
1250
+ old_keep = getattr(proxy, "PROXY_SESSION_CONTAMINATION_KEEP_LAST")
1251
+ old_forced = getattr(proxy, "PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD")
1252
+ old_required = getattr(
1253
+ proxy, "PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD"
1254
+ )
1255
+ try:
1256
+ setattr(proxy, "PROXY_SESSION_CONTAMINATION_BREAKER", True)
1257
+ setattr(proxy, "PROXY_SESSION_CONTAMINATION_THRESHOLD", 3)
1258
+ setattr(proxy, "PROXY_SESSION_CONTAMINATION_KEEP_LAST", 3)
1259
+ setattr(proxy, "PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD", 5)
1260
+ setattr(proxy, "PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD", 4)
1261
+
1262
+ monitor = proxy.SessionMonitor(context_window=262144)
1263
+ monitor.invalid_tool_call_streak = 2
1264
+ monitor.consecutive_forced_count = 6
1265
+ body = {
1266
+ "messages": [
1267
+ {"role": "user", "content": "start"},
1268
+ {"role": "assistant", "content": "a1"},
1269
+ {"role": "user", "content": "u2"},
1270
+ {"role": "assistant", "content": "a3"},
1271
+ {"role": "user", "content": "u4"},
1272
+ {"role": "assistant", "content": "a5"},
1273
+ ]
1274
+ }
1275
+
1276
+ updated = proxy._maybe_apply_session_contamination_breaker(
1277
+ body, monitor, "session-test"
1278
+ )
1279
+
1280
+ self.assertEqual(monitor.contamination_resets, 1)
1281
+ self.assertEqual(monitor.invalid_tool_call_streak, 0)
1282
+ self.assertEqual(len(updated["messages"]), 5)
1283
+ self.assertIn("SESSION RESET", updated["messages"][1]["content"])
1284
+ finally:
1285
+ setattr(proxy, "PROXY_SESSION_CONTAMINATION_BREAKER", old_enabled)
1286
+ setattr(proxy, "PROXY_SESSION_CONTAMINATION_THRESHOLD", old_threshold)
1287
+ setattr(proxy, "PROXY_SESSION_CONTAMINATION_KEEP_LAST", old_keep)
1288
+ setattr(proxy, "PROXY_SESSION_CONTAMINATION_FORCED_THRESHOLD", old_forced)
1289
+ setattr(
1290
+ proxy,
1291
+ "PROXY_SESSION_CONTAMINATION_REQUIRED_MISS_THRESHOLD",
1292
+ old_required,
1293
+ )
1294
+
634
1295
 
635
1296
  if __name__ == "__main__":
636
1297
  unittest.main()