@miller-tech/uap 1.34.0 → 1.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -443,6 +443,23 @@ PROXY_TOOL_CALL_GRAMMAR_PATH = os.path.abspath(
443
443
  os.path.join(os.path.dirname(__file__), "..", "config", "tool-call.gbnf"),
444
444
  )
445
445
  )
446
+ # Structured thinking grammar — forces a compact <think> header on non-tool
447
+ # reasoning turns so downstream verifiers can parse the model's framing.
448
+ # Default off (opt-in) because it changes output shape.
449
+ PROXY_THINKING_GRAMMAR = os.environ.get(
450
+ "PROXY_THINKING_GRAMMAR", "off"
451
+ ).lower() not in {
452
+ "0",
453
+ "false",
454
+ "off",
455
+ "no",
456
+ }
457
+ PROXY_THINKING_GRAMMAR_PATH = os.path.abspath(
458
+ os.environ.get(
459
+ "PROXY_THINKING_GRAMMAR_PATH",
460
+ os.path.join(os.path.dirname(__file__), "..", "config", "thinking.gbnf"),
461
+ )
462
+ )
446
463
  PROXY_MODEL_PROFILE_HEADER = os.environ.get(
447
464
  "PROXY_MODEL_PROFILE_HEADER", "x-uap-model-profile"
448
465
  )
@@ -534,6 +551,41 @@ def _load_tool_call_grammar(path: str) -> str:
534
551
  TOOL_CALL_GBNF = _load_tool_call_grammar(PROXY_TOOL_CALL_GRAMMAR_PATH)
535
552
  TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE = True
536
553
 
554
+
555
+ def _load_thinking_grammar(path: str) -> str:
556
+ if not PROXY_THINKING_GRAMMAR:
557
+ return ""
558
+
559
+ try:
560
+ with open(path, "r", encoding="utf-8") as fh:
561
+ return fh.read().strip()
562
+ except OSError as exc:
563
+ logger.warning(
564
+ "Thinking grammar disabled: failed to read %s (%s)",
565
+ path,
566
+ exc,
567
+ )
568
+ return ""
569
+
570
+
571
+ THINKING_GBNF = _load_thinking_grammar(PROXY_THINKING_GRAMMAR_PATH)
572
+
573
+
574
+ def _apply_thinking_grammar(request_body: dict) -> None:
575
+ """Apply the structured-thinking GBNF grammar to non-tool turns.
576
+
577
+ Only fires when PROXY_THINKING_GRAMMAR is on, the grammar loaded
578
+ successfully, the request has no tools, and no upstream grammar was
579
+ already set (tool-call grammar takes precedence on tool turns).
580
+ """
581
+ if not PROXY_THINKING_GRAMMAR or not THINKING_GBNF:
582
+ return
583
+ if request_body.get("tools"):
584
+ return
585
+ if request_body.get("grammar"):
586
+ return
587
+ request_body["grammar"] = THINKING_GBNF
588
+
537
589
  def _resolve_passthrough_models() -> list[str]:
538
590
  raw = ANTHROPIC_PASSTHROUGH_MODELS.strip()
539
591
  if not raw:
@@ -2079,6 +2131,12 @@ async def lifespan(app: FastAPI):
2079
2131
  TOOL_CALL_GRAMMAR_TOOLS_COMPATIBLE,
2080
2132
  PROXY_TOOL_CALL_GRAMMAR_PATH,
2081
2133
  )
2134
+ logger.info(
2135
+ "Thinking grammar: enabled=%s loaded=%s path=%s",
2136
+ PROXY_THINKING_GRAMMAR,
2137
+ bool(THINKING_GBNF),
2138
+ PROXY_THINKING_GRAMMAR_PATH,
2139
+ )
2082
2140
  logger.info(
2083
2141
  "Timeouts: read=%ds generation=%ds slot_hang=%ds",
2084
2142
  int(PROXY_READ_TIMEOUT),
@@ -3910,6 +3968,8 @@ def build_openai_request(
3910
3968
  # pre-narrowing toolset so it can restore a dropped write tool.
3911
3969
  _maybe_inject_recon_convergence(openai_body, monitor, full_openai_tools)
3912
3970
 
3971
+ _apply_thinking_grammar(openai_body)
3972
+
3913
3973
  return openai_body
3914
3974
 
3915
3975
 
@@ -46,7 +46,7 @@ logging.basicConfig(
46
46
  )
47
47
  logger = logging.getLogger("qwen35_tool_call")
48
48
 
49
- DEFAULT_LLM_SERVER = "http://192.168.1.165:4000"
49
+ DEFAULT_LLM_SERVER = "http://127.0.0.1:4000"
50
50
 
51
51
 
52
52
  def _normalize_base_url(url: str) -> str:
@@ -64,7 +64,7 @@ logger = logging.getLogger("uap_tool_call")
64
64
 
65
65
  # ── Model Profiles ──────────────────────────────────────────────────────────
66
66
 
67
- DEFAULT_LLM_SERVER = "http://192.168.1.165:4000"
67
+ DEFAULT_LLM_SERVER = "http://127.0.0.1:4000"
68
68
 
69
69
 
70
70
  def _normalize_base_url(url: str) -> str: