@miller-tech/uap 1.20.12 → 1.20.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -143,7 +143,7 @@ PROXY_TOOL_STATE_MIN_MESSAGES = int(
|
|
|
143
143
|
os.environ.get("PROXY_TOOL_STATE_MIN_MESSAGES", "6")
|
|
144
144
|
)
|
|
145
145
|
PROXY_TOOL_STATE_FORCED_BUDGET = int(
|
|
146
|
-
os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "
|
|
146
|
+
os.environ.get("PROXY_TOOL_STATE_FORCED_BUDGET", "12")
|
|
147
147
|
)
|
|
148
148
|
PROXY_TOOL_STATE_AUTO_BUDGET = int(os.environ.get("PROXY_TOOL_STATE_AUTO_BUDGET", "2"))
|
|
149
149
|
PROXY_TOOL_STATE_STAGNATION_THRESHOLD = int(
|
|
@@ -156,7 +156,7 @@ PROXY_TOOL_STATE_FINALIZE_THRESHOLD = int(
|
|
|
156
156
|
os.environ.get("PROXY_TOOL_STATE_FINALIZE_THRESHOLD", "18")
|
|
157
157
|
)
|
|
158
158
|
PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT = int(
|
|
159
|
-
os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "
|
|
159
|
+
os.environ.get("PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT", "1")
|
|
160
160
|
)
|
|
161
161
|
PROXY_CLIENT_RATE_WINDOW_SECS = int(
|
|
162
162
|
os.environ.get("PROXY_CLIENT_RATE_WINDOW_SECS", "60")
|
|
@@ -628,6 +628,7 @@ class SessionMonitor:
|
|
|
628
628
|
tool_state_transitions: int = 0
|
|
629
629
|
tool_state_review_cycles: int = 0
|
|
630
630
|
last_tool_fingerprint: str = ""
|
|
631
|
+
cycling_tool_names: list = field(default_factory=list)
|
|
631
632
|
finalize_turn_active: bool = False
|
|
632
633
|
completion_required: bool = False
|
|
633
634
|
completion_pending: bool = False
|
|
@@ -832,6 +833,7 @@ class SessionMonitor:
|
|
|
832
833
|
self.tool_state_auto_budget_remaining = 0
|
|
833
834
|
self.tool_state_stagnation_streak = 0
|
|
834
835
|
self.tool_state_review_cycles = 0
|
|
836
|
+
self.cycling_tool_names = []
|
|
835
837
|
self.last_tool_fingerprint = ""
|
|
836
838
|
|
|
837
839
|
def update_completion_state(self, anthropic_body: dict, has_tool_results: bool):
|
|
@@ -2053,12 +2055,17 @@ def _resolve_state_machine_tool_choice(
|
|
|
2053
2055
|
monitor.tool_state_forced_budget_remaining = max(
|
|
2054
2056
|
1, PROXY_TOOL_STATE_FORCED_BUDGET // 2
|
|
2055
2057
|
)
|
|
2058
|
+
# Capture which tools are cycling for narrowing/hint injection
|
|
2059
|
+
window = max(2, PROXY_TOOL_STATE_CYCLE_WINDOW)
|
|
2060
|
+
recent = [fp for fp in monitor.tool_call_history[-window:] if fp]
|
|
2061
|
+
monitor.cycling_tool_names = list(dict.fromkeys(recent))
|
|
2056
2062
|
logger.warning(
|
|
2057
|
-
"TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d)",
|
|
2063
|
+
"TOOL STATE MACHINE: entering review (cycle=%s repeat=%d stagnation=%d cycles=%d cycling_tools=%s)",
|
|
2058
2064
|
cycle_looping,
|
|
2059
2065
|
cycle_repeat,
|
|
2060
2066
|
monitor.tool_state_stagnation_streak,
|
|
2061
2067
|
monitor.tool_state_review_cycles,
|
|
2068
|
+
monitor.cycling_tool_names,
|
|
2062
2069
|
)
|
|
2063
2070
|
return "required", reason
|
|
2064
2071
|
|
|
@@ -2349,6 +2356,49 @@ def build_openai_request(
|
|
|
2349
2356
|
monitor.no_progress_streak = (
|
|
2350
2357
|
0 if last_user_has_tool_result else monitor.no_progress_streak + 1
|
|
2351
2358
|
)
|
|
2359
|
+
# Option 1: Inject cycle-break instruction when entering review
|
|
2360
|
+
if (
|
|
2361
|
+
monitor.tool_turn_phase == "review"
|
|
2362
|
+
and state_reason in {"cycle_detected", "stagnation"}
|
|
2363
|
+
and monitor.cycling_tool_names
|
|
2364
|
+
):
|
|
2365
|
+
cycling_names = ", ".join(monitor.cycling_tool_names)
|
|
2366
|
+
cycle_hint = (
|
|
2367
|
+
f"You have been repeatedly calling the same tool(s): {cycling_names}. "
|
|
2368
|
+
"This is not making progress. Use a DIFFERENT tool to advance the task, "
|
|
2369
|
+
"or call a tool that produces your final answer."
|
|
2370
|
+
)
|
|
2371
|
+
messages = openai_body.get("messages", [])
|
|
2372
|
+
messages.append({"role": "user", "content": cycle_hint})
|
|
2373
|
+
openai_body["messages"] = messages
|
|
2374
|
+
logger.warning(
|
|
2375
|
+
"CYCLE BREAK: injected hint about cycling tools: %s",
|
|
2376
|
+
cycling_names,
|
|
2377
|
+
)
|
|
2378
|
+
# Option 2: Narrow tools during review to exclude cycling tools
|
|
2379
|
+
if (
|
|
2380
|
+
monitor.tool_turn_phase == "review"
|
|
2381
|
+
and monitor.cycling_tool_names
|
|
2382
|
+
and "tools" in openai_body
|
|
2383
|
+
):
|
|
2384
|
+
original_count = len(openai_body["tools"])
|
|
2385
|
+
narrowed = [
|
|
2386
|
+
t
|
|
2387
|
+
for t in openai_body["tools"]
|
|
2388
|
+
if t.get("function", {}).get("name") not in monitor.cycling_tool_names
|
|
2389
|
+
]
|
|
2390
|
+
if narrowed:
|
|
2391
|
+
openai_body["tools"] = narrowed
|
|
2392
|
+
logger.warning(
|
|
2393
|
+
"CYCLE BREAK: narrowed tools from %d to %d (excluded %s)",
|
|
2394
|
+
original_count,
|
|
2395
|
+
len(narrowed),
|
|
2396
|
+
monitor.cycling_tool_names,
|
|
2397
|
+
)
|
|
2398
|
+
else:
|
|
2399
|
+
logger.warning(
|
|
2400
|
+
"CYCLE BREAK: cannot narrow tools — all tools are cycling, keeping original set",
|
|
2401
|
+
)
|
|
2352
2402
|
logger.info(
|
|
2353
2403
|
"tool_choice forced to 'required' by TOOL STATE MACHINE (phase=%s reason=%s forced_budget=%d)",
|
|
2354
2404
|
monitor.tool_turn_phase,
|
|
@@ -3236,6 +3236,147 @@ class TestPruningImprovements(unittest.TestCase):
|
|
|
3236
3236
|
self.assertEqual(monitor.tool_turn_phase, "finalize")
|
|
3237
3237
|
|
|
3238
3238
|
|
|
3239
|
+
class TestCycleBreakOptions(unittest.TestCase):
|
|
3240
|
+
"""Tests for cycle-break options: hint injection, tool narrowing, reduced budgets."""
|
|
3241
|
+
|
|
3242
|
+
def test_cycle_break_injects_hint_message(self):
|
|
3243
|
+
"""Option 1: cycle detection injects a user hint about the cycling tools."""
|
|
3244
|
+
old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
|
|
3245
|
+
old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
|
|
3246
|
+
old_forced = getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET")
|
|
3247
|
+
old_auto = getattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET")
|
|
3248
|
+
old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
|
|
3249
|
+
old_cycle_window = getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW")
|
|
3250
|
+
try:
|
|
3251
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
|
|
3252
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
|
|
3253
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
|
|
3254
|
+
setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", 2)
|
|
3255
|
+
setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 99)
|
|
3256
|
+
setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 4)
|
|
3257
|
+
|
|
3258
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
3259
|
+
monitor.tool_turn_phase = "act"
|
|
3260
|
+
monitor.tool_state_forced_budget_remaining = 20
|
|
3261
|
+
monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
|
|
3262
|
+
monitor.last_tool_fingerprint = "Bash"
|
|
3263
|
+
|
|
3264
|
+
body = {
|
|
3265
|
+
"model": "test",
|
|
3266
|
+
"messages": [
|
|
3267
|
+
{"role": "user", "content": "start"},
|
|
3268
|
+
{
|
|
3269
|
+
"role": "assistant",
|
|
3270
|
+
"content": [
|
|
3271
|
+
{"type": "tool_use", "id": "t1", "name": "Bash", "input": {"command": "ls"}},
|
|
3272
|
+
],
|
|
3273
|
+
},
|
|
3274
|
+
{
|
|
3275
|
+
"role": "user",
|
|
3276
|
+
"content": [
|
|
3277
|
+
{"type": "tool_result", "tool_use_id": "t1", "content": "ok"},
|
|
3278
|
+
],
|
|
3279
|
+
},
|
|
3280
|
+
],
|
|
3281
|
+
"tools": [
|
|
3282
|
+
{"name": "Bash", "description": "Run command", "input_schema": {"type": "object"}},
|
|
3283
|
+
{"name": "Read", "description": "Read file", "input_schema": {"type": "object"}},
|
|
3284
|
+
],
|
|
3285
|
+
}
|
|
3286
|
+
|
|
3287
|
+
openai = proxy.build_openai_request(body, monitor)
|
|
3288
|
+
self.assertEqual(monitor.tool_turn_phase, "review")
|
|
3289
|
+
# Check that a cycle-break hint was injected
|
|
3290
|
+
messages = openai.get("messages", [])
|
|
3291
|
+
last_msg = messages[-1] if messages else {}
|
|
3292
|
+
self.assertEqual(last_msg.get("role"), "user")
|
|
3293
|
+
self.assertIn("Bash", last_msg.get("content", ""))
|
|
3294
|
+
self.assertIn("DIFFERENT tool", last_msg.get("content", ""))
|
|
3295
|
+
finally:
|
|
3296
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
|
|
3297
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
|
|
3298
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", old_forced)
|
|
3299
|
+
setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", old_auto)
|
|
3300
|
+
setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", old_stagnation)
|
|
3301
|
+
setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", old_cycle_window)
|
|
3302
|
+
|
|
3303
|
+
def test_cycle_break_narrows_tools(self):
|
|
3304
|
+
"""Option 2: cycling tools are excluded from the tools array during review."""
|
|
3305
|
+
old_state = getattr(proxy, "PROXY_TOOL_STATE_MACHINE")
|
|
3306
|
+
old_min_msgs = getattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES")
|
|
3307
|
+
old_forced = getattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET")
|
|
3308
|
+
old_auto = getattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET")
|
|
3309
|
+
old_stagnation = getattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD")
|
|
3310
|
+
old_cycle_window = getattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW")
|
|
3311
|
+
try:
|
|
3312
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", True)
|
|
3313
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", 3)
|
|
3314
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", 20)
|
|
3315
|
+
setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", 2)
|
|
3316
|
+
setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", 99)
|
|
3317
|
+
setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", 4)
|
|
3318
|
+
|
|
3319
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
3320
|
+
monitor.tool_turn_phase = "act"
|
|
3321
|
+
monitor.tool_state_forced_budget_remaining = 20
|
|
3322
|
+
monitor.tool_call_history = ["Bash", "Bash", "Bash", "Bash"]
|
|
3323
|
+
monitor.last_tool_fingerprint = "Bash"
|
|
3324
|
+
|
|
3325
|
+
body = {
|
|
3326
|
+
"model": "test",
|
|
3327
|
+
"messages": [
|
|
3328
|
+
{"role": "user", "content": "start"},
|
|
3329
|
+
{
|
|
3330
|
+
"role": "assistant",
|
|
3331
|
+
"content": [
|
|
3332
|
+
{"type": "tool_use", "id": "t1", "name": "Bash", "input": {"command": "ls"}},
|
|
3333
|
+
],
|
|
3334
|
+
},
|
|
3335
|
+
{
|
|
3336
|
+
"role": "user",
|
|
3337
|
+
"content": [
|
|
3338
|
+
{"type": "tool_result", "tool_use_id": "t1", "content": "ok"},
|
|
3339
|
+
],
|
|
3340
|
+
},
|
|
3341
|
+
],
|
|
3342
|
+
"tools": [
|
|
3343
|
+
{"name": "Bash", "description": "Run command", "input_schema": {"type": "object"}},
|
|
3344
|
+
{"name": "Read", "description": "Read file", "input_schema": {"type": "object"}},
|
|
3345
|
+
{"name": "Write", "description": "Write file", "input_schema": {"type": "object"}},
|
|
3346
|
+
],
|
|
3347
|
+
}
|
|
3348
|
+
|
|
3349
|
+
openai = proxy.build_openai_request(body, monitor)
|
|
3350
|
+
self.assertEqual(monitor.tool_turn_phase, "review")
|
|
3351
|
+
# Bash should be excluded, Read and Write should remain
|
|
3352
|
+
tool_names = [t["function"]["name"] for t in openai.get("tools", [])]
|
|
3353
|
+
self.assertNotIn("Bash", tool_names)
|
|
3354
|
+
self.assertIn("Read", tool_names)
|
|
3355
|
+
self.assertIn("Write", tool_names)
|
|
3356
|
+
finally:
|
|
3357
|
+
setattr(proxy, "PROXY_TOOL_STATE_MACHINE", old_state)
|
|
3358
|
+
setattr(proxy, "PROXY_TOOL_STATE_MIN_MESSAGES", old_min_msgs)
|
|
3359
|
+
setattr(proxy, "PROXY_TOOL_STATE_FORCED_BUDGET", old_forced)
|
|
3360
|
+
setattr(proxy, "PROXY_TOOL_STATE_AUTO_BUDGET", old_auto)
|
|
3361
|
+
setattr(proxy, "PROXY_TOOL_STATE_STAGNATION_THRESHOLD", old_stagnation)
|
|
3362
|
+
setattr(proxy, "PROXY_TOOL_STATE_CYCLE_WINDOW", old_cycle_window)
|
|
3363
|
+
|
|
3364
|
+
def test_forced_budget_default_is_12(self):
|
|
3365
|
+
"""Option 3: default forced budget reduced from 24 to 12."""
|
|
3366
|
+
self.assertEqual(proxy.PROXY_TOOL_STATE_FORCED_BUDGET, 12)
|
|
3367
|
+
|
|
3368
|
+
def test_review_cycle_limit_default_is_1(self):
|
|
3369
|
+
"""Option 4: default review cycle limit reduced from 2 to 1."""
|
|
3370
|
+
self.assertEqual(proxy.PROXY_TOOL_STATE_REVIEW_CYCLE_LIMIT, 1)
|
|
3371
|
+
|
|
3372
|
+
def test_cycling_tool_names_cleared_on_reset(self):
|
|
3373
|
+
"""cycling_tool_names is cleared when tool turn state resets."""
|
|
3374
|
+
monitor = proxy.SessionMonitor(context_window=262144)
|
|
3375
|
+
monitor.cycling_tool_names = ["Bash", "Read"]
|
|
3376
|
+
monitor.reset_tool_turn_state(reason="test")
|
|
3377
|
+
self.assertEqual(monitor.cycling_tool_names, [])
|
|
3378
|
+
|
|
3379
|
+
|
|
3239
3380
|
if __name__ == "__main__":
|
|
3240
3381
|
unittest.main()
|
|
3241
3382
|
|