@miller-tech/uap 1.15.9 → 1.15.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -2479,16 +2479,14 @@ def _classify_tool_response_issue(
|
|
|
2479
2479
|
has_tool_calls = _openai_has_tool_calls(openai_resp)
|
|
2480
2480
|
if not has_tool_calls:
|
|
2481
2481
|
if required_tool_choice:
|
|
2482
|
-
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
|
|
2486
|
-
|
|
2487
|
-
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
),
|
|
2491
|
-
)
|
|
2482
|
+
return ToolResponseIssue(
|
|
2483
|
+
kind="required_tool_miss",
|
|
2484
|
+
reason="required tool turn returned no tool calls",
|
|
2485
|
+
retry_hint=(
|
|
2486
|
+
"A tool call is mandatory for this turn. Emit exactly one valid tool call now "
|
|
2487
|
+
"with a strict JSON object in `arguments`."
|
|
2488
|
+
),
|
|
2489
|
+
)
|
|
2492
2490
|
return ToolResponseIssue()
|
|
2493
2491
|
|
|
2494
2492
|
if not PROXY_TOOL_ARGS_PREFLIGHT:
|
|
@@ -2566,6 +2564,49 @@ def _looks_malformed_tool_payload(text: str) -> bool:
|
|
|
2566
2564
|
return True
|
|
2567
2565
|
if lowered.count("</parameter") >= 1 and lowered.count('{"description"') >= 1:
|
|
2568
2566
|
return True
|
|
2567
|
+
if _looks_repetitive_policy_echo(text):
|
|
2568
|
+
return True
|
|
2569
|
+
return False
|
|
2570
|
+
|
|
2571
|
+
|
|
2572
|
+
def _looks_repetitive_policy_echo(text: str) -> bool:
|
|
2573
|
+
if not text:
|
|
2574
|
+
return False
|
|
2575
|
+
|
|
2576
|
+
lowered = text.lower()
|
|
2577
|
+
compact = re.sub(r"\s+", " ", lowered).strip()
|
|
2578
|
+
if not compact:
|
|
2579
|
+
return False
|
|
2580
|
+
|
|
2581
|
+
policy_phrase_markers = (
|
|
2582
|
+
"at least 2 new test cases",
|
|
2583
|
+
"tests must be in test/",
|
|
2584
|
+
"describe/it/expect using vitest",
|
|
2585
|
+
)
|
|
2586
|
+
if any(compact.count(marker) >= 4 for marker in policy_phrase_markers):
|
|
2587
|
+
return True
|
|
2588
|
+
|
|
2589
|
+
lines = [
|
|
2590
|
+
re.sub(r"\s+", " ", line.strip().lower())
|
|
2591
|
+
for line in text.splitlines()
|
|
2592
|
+
if line.strip()
|
|
2593
|
+
]
|
|
2594
|
+
if lines:
|
|
2595
|
+
line_counts: dict[str, int] = {}
|
|
2596
|
+
for line in lines:
|
|
2597
|
+
if len(line) < 24:
|
|
2598
|
+
continue
|
|
2599
|
+
line_counts[line] = line_counts.get(line, 0) + 1
|
|
2600
|
+
if line_counts and max(line_counts.values()) >= 8:
|
|
2601
|
+
return True
|
|
2602
|
+
|
|
2603
|
+
repeated_phrase_match = re.search(
|
|
2604
|
+
r"((?:[a-z0-9_./-]+\s+){2,8}[a-z0-9_./-]+)(?:\s+\1){7,}",
|
|
2605
|
+
compact,
|
|
2606
|
+
)
|
|
2607
|
+
if repeated_phrase_match:
|
|
2608
|
+
return True
|
|
2609
|
+
|
|
2569
2610
|
return False
|
|
2570
2611
|
|
|
2571
2612
|
|
|
@@ -805,6 +805,54 @@ class TestMalformedToolGuardrail(unittest.TestCase):
|
|
|
805
805
|
)
|
|
806
806
|
self.assertEqual(issue.kind, "required_tool_miss")
|
|
807
807
|
|
|
808
|
+
def test_required_tool_turn_with_long_text_without_tool_call_is_flagged(self):
|
|
809
|
+
openai_resp = {
|
|
810
|
+
"choices": [
|
|
811
|
+
{
|
|
812
|
+
"finish_reason": "stop",
|
|
813
|
+
"message": {
|
|
814
|
+
"content": (
|
|
815
|
+
"I reviewed the repository and here is a long explanation that still "
|
|
816
|
+
"does not include any valid tool call payload for this required turn."
|
|
817
|
+
),
|
|
818
|
+
"tool_calls": [],
|
|
819
|
+
},
|
|
820
|
+
}
|
|
821
|
+
]
|
|
822
|
+
}
|
|
823
|
+
anthropic_body = {
|
|
824
|
+
"tools": [{"name": "Edit", "input_schema": {"type": "object"}}],
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
issue = proxy._classify_tool_response_issue(
|
|
828
|
+
openai_resp, anthropic_body, required_tool_choice=True
|
|
829
|
+
)
|
|
830
|
+
self.assertEqual(issue.kind, "required_tool_miss")
|
|
831
|
+
|
|
832
|
+
def test_preflight_flags_repetitive_policy_echo_without_tool_call(self):
|
|
833
|
+
repeated = " (describe/it/expect using vitest" * 24
|
|
834
|
+
openai_resp = {
|
|
835
|
+
"choices": [
|
|
836
|
+
{
|
|
837
|
+
"finish_reason": "stop",
|
|
838
|
+
"message": {
|
|
839
|
+
"content": (
|
|
840
|
+
"- At least 2 new test cases before claiming done. "
|
|
841
|
+
"- Tests must be in test/ following existing patterns."
|
|
842
|
+
f"{repeated}"
|
|
843
|
+
),
|
|
844
|
+
"tool_calls": [],
|
|
845
|
+
},
|
|
846
|
+
}
|
|
847
|
+
]
|
|
848
|
+
}
|
|
849
|
+
anthropic_body = {
|
|
850
|
+
"tools": [{"name": "Read", "input_schema": {"type": "object"}}],
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
issue = proxy._classify_tool_response_issue(openai_resp, anthropic_body)
|
|
854
|
+
self.assertEqual(issue.kind, "malformed_payload")
|
|
855
|
+
|
|
808
856
|
def test_markup_repair_sanitizes_tool_arguments(self):
|
|
809
857
|
openai_resp = {
|
|
810
858
|
"choices": [
|