@miller-tech/uap 1.34.0 → 1.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/.tsbuildinfo +1 -1
- package/dist/cli/tool-calls.js +4 -4
- package/dist/cli/tool-calls.js.map +1 -1
- package/dist/delivery/index.d.ts +1 -1
- package/dist/delivery/index.d.ts.map +1 -1
- package/dist/delivery/index.js +1 -1
- package/dist/delivery/index.js.map +1 -1
- package/dist/delivery/spec-imports.d.ts +30 -6
- package/dist/delivery/spec-imports.d.ts.map +1 -1
- package/dist/delivery/spec-imports.js +213 -11
- package/dist/delivery/spec-imports.js.map +1 -1
- package/package.json +1 -1
- package/tools/agents/config/thinking.gbnf +25 -0
- package/tools/agents/scripts/anthropic_proxy.py +60 -0
- package/tools/agents/scripts/qwen_tool_call_wrapper.py +1 -1
- package/tools/agents/scripts/tool_call_wrapper.py +1 -1
|
@@ -443,6 +443,23 @@ PROXY_TOOL_CALL_GRAMMAR_PATH = os.path.abspath(
|
|
|
443
443
|
os.path.join(os.path.dirname(__file__), "..", "config", "tool-call.gbnf"),
|
|
444
444
|
)
|
|
445
445
|
)
|
|
446
|
+
# Structured thinking grammar — forces a compact <think> header on non-tool
|
|
447
|
+
# reasoning turns so downstream verifiers can parse the model's framing.
|
|
448
|
+
# Default off (opt-in) because it changes output shape.
|
|
449
|
+
PROXY_THINKING_GRAMMAR = os.environ.get(
|
|
450
|
+
"PROXY_THINKING_GRAMMAR", "off"
|
|
451
|
+
).lower() not in {
|
|
452
|
+
"0",
|
|
453
|
+
"false",
|
|
454
|
+
"off",
|
|
455
|
+
"no",
|
|
456
|
+
}
|
|
457
|
+
PROXY_THINKING_GRAMMAR_PATH = os.path.abspath(
|
|
458
|
+
os.environ.get(
|
|
459
|
+
"PROXY_THINKING_GRAMMAR_PATH",
|
|
460
|
+
os.path.join(os.path.dirname(__file__), "..", "config", "thinking.gbnf"),
|
|
461
|
+
)
|
|
462
|
+
)
|
|
446
463
|
PROXY_MODEL_PROFILE_HEADER = os.environ.get(
|
|
447
464
|
"PROXY_MODEL_PROFILE_HEADER", "x-uap-model-profile"
|
|
448
465
|
)
|
|
@@ -534,6 +551,41 @@ def _load_tool_call_grammar(path: str) -> str:
|
|
|
534
551
|
TOOL_CALL_GBNF = _load_tool_call_grammar(PROXY_TOOL_CALL_GRAMMAR_PATH)
|
|
535
552
|
TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = True
|
|
536
553
|
|
|
554
|
+
|
|
555
|
+
def _load_thinking_grammar(path: str) -> str:
|
|
556
|
+
if not PROXY_THINKING_GRAMMAR:
|
|
557
|
+
return ""
|
|
558
|
+
|
|
559
|
+
try:
|
|
560
|
+
with open(path, "r", encoding="utf-8") as fh:
|
|
561
|
+
return fh.read().strip()
|
|
562
|
+
except OSError as exc:
|
|
563
|
+
logger.warning(
|
|
564
|
+
"Thinking grammar disabled: failed to read %s (%s)",
|
|
565
|
+
path,
|
|
566
|
+
exc,
|
|
567
|
+
)
|
|
568
|
+
return ""
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
THINKING_GBNF = _load_thinking_grammar(PROXY_THINKING_GRAMMAR_PATH)
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def _apply_thinking_grammar(request_body: dict) -> None:
|
|
575
|
+
"""Apply the structured-thinking GBNF grammar to non-tool turns.
|
|
576
|
+
|
|
577
|
+
Only fires when PROXY_THINKING_GRAMMAR is on, the grammar loaded
|
|
578
|
+
successfully, the request has no tools, and no upstream grammar was
|
|
579
|
+
already set (tool-call grammar takes precedence on tool turns).
|
|
580
|
+
"""
|
|
581
|
+
if not PROXY_THINKING_GRAMMAR or not THINKING_GBNF:
|
|
582
|
+
return
|
|
583
|
+
if request_body.get("tools"):
|
|
584
|
+
return
|
|
585
|
+
if request_body.get("grammar"):
|
|
586
|
+
return
|
|
587
|
+
request_body["grammar"] = THINKING_GBNF
|
|
588
|
+
|
|
537
589
|
def _resolve_passthrough_models() -> list[str]:
|
|
538
590
|
raw = ANTHROPIC_PASSTHROUGH_MODELS.strip()
|
|
539
591
|
if not raw:
|
|
@@ -2079,6 +2131,12 @@ async def lifespan(app: FastAPI):
|
|
|
2079
2131
|
TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE,
|
|
2080
2132
|
PROXY_TOOL_CALL_GRAMMAR_PATH,
|
|
2081
2133
|
)
|
|
2134
|
+
logger.info(
|
|
2135
|
+
"Thinking grammar: enabled=%s loaded=%s path=%s",
|
|
2136
|
+
PROXY_THINKING_GRAMMAR,
|
|
2137
|
+
bool(THINKING_GBNF),
|
|
2138
|
+
PROXY_THINKING_GRAMMAR_PATH,
|
|
2139
|
+
)
|
|
2082
2140
|
logger.info(
|
|
2083
2141
|
"Timeouts: read=%ds generation=%ds slot_hang=%ds",
|
|
2084
2142
|
int(PROXY_READ_TIMEOUT),
|
|
@@ -3910,6 +3968,8 @@ def build_openai_request(
|
|
|
3910
3968
|
# pre-narrowing toolset so it can restore a dropped write tool.
|
|
3911
3969
|
_maybe_inject_recon_convergence(openai_body, monitor, full_openai_tools)
|
|
3912
3970
|
|
|
3971
|
+
_apply_thinking_grammar(openai_body)
|
|
3972
|
+
|
|
3913
3973
|
return openai_body
|
|
3914
3974
|
|
|
3915
3975
|
|
|
@@ -64,7 +64,7 @@ logger = logging.getLogger("uap_tool_call")
|
|
|
64
64
|
|
|
65
65
|
# ── Model Profiles ──────────────────────────────────────────────────────────
|
|
66
66
|
|
|
67
|
-
DEFAULT_LLM_SERVER = "http://
|
|
67
|
+
DEFAULT_LLM_SERVER = "http://127.0.0.1:4000"
|
|
68
68
|
|
|
69
69
|
|
|
70
70
|
def _normalize_base_url(url: str) -> str:
|