PyPI - devcopilot - Versions diffs - 0.2.0__py3-none-any.whl - Mend

devcopilot 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

api/__init__.py +17 -0
api/admin_config.py +1303 -0
api/admin_routes.py +287 -0
api/admin_static/admin.css +459 -0
api/admin_static/admin.js +497 -0
api/admin_static/index.html +77 -0
api/admin_urls.py +34 -0
api/app.py +194 -0
api/command_utils.py +164 -0
api/dependencies.py +144 -0
api/detection.py +152 -0
api/gateway_model_ids.py +54 -0
api/model_catalog.py +133 -0
api/model_router.py +125 -0
api/models/__init__.py +45 -0
api/models/anthropic.py +234 -0
api/models/openai_responses.py +28 -0
api/models/responses.py +60 -0
api/optimization_handlers.py +154 -0
api/request_pipeline.py +424 -0
api/routes.py +156 -0
api/runtime.py +334 -0
api/validation_log.py +48 -0
api/web_server_tools.py +22 -0
api/web_tools/__init__.py +17 -0
api/web_tools/constants.py +15 -0
api/web_tools/egress.py +99 -0
api/web_tools/outbound.py +278 -0
api/web_tools/parsers.py +104 -0
api/web_tools/request.py +87 -0
api/web_tools/streaming.py +206 -0
cli/__init__.py +5 -0
cli/claude_env.py +12 -0
cli/entrypoints.py +166 -0
cli/env.example +209 -0
cli/launchers/__init__.py +1 -0
cli/launchers/claude.py +84 -0
cli/launchers/codex.py +204 -0
cli/launchers/codex_model_catalog.py +186 -0
cli/launchers/common.py +93 -0
cli/managed/__init__.py +6 -0
cli/managed/claude.py +215 -0
cli/managed/manager.py +157 -0
cli/managed/session.py +260 -0
cli/process_registry.py +78 -0
config/__init__.py +5 -0
config/constants.py +13 -0
config/logging_config.py +159 -0
config/nim.py +118 -0
config/paths.py +91 -0
config/provider_catalog.py +259 -0
config/provider_ids.py +7 -0
config/settings.py +538 -0
core/__init__.py +1 -0
core/anthropic/__init__.py +46 -0
core/anthropic/content.py +31 -0
core/anthropic/conversion.py +587 -0
core/anthropic/emitted_sse_tracker.py +346 -0
core/anthropic/errors.py +70 -0
core/anthropic/native_messages_request.py +280 -0
core/anthropic/native_sse_block_policy.py +313 -0
core/anthropic/provider_stream_error.py +34 -0
core/anthropic/server_tool_sse.py +14 -0
core/anthropic/sse.py +440 -0
core/anthropic/stream_contracts.py +205 -0
core/anthropic/stream_recovery.py +346 -0
core/anthropic/stream_recovery_session.py +133 -0
core/anthropic/thinking.py +140 -0
core/anthropic/tokens.py +117 -0
core/anthropic/tools.py +212 -0
core/anthropic/utils.py +9 -0
core/openai_responses/__init__.py +5 -0
core/openai_responses/adapter.py +31 -0
core/openai_responses/anthropic_sse.py +59 -0
core/openai_responses/errors.py +22 -0
core/openai_responses/events.py +19 -0
core/openai_responses/ids.py +21 -0
core/openai_responses/input.py +258 -0
core/openai_responses/items.py +37 -0
core/openai_responses/reasoning.py +52 -0
core/openai_responses/stream.py +25 -0
core/openai_responses/stream_state.py +654 -0
core/openai_responses/tools.py +374 -0
core/openai_responses/usage.py +37 -0
core/rate_limit.py +60 -0
core/trace.py +216 -0
devcopilot-0.2.0.dist-info/METADATA +687 -0
devcopilot-0.2.0.dist-info/RECORD +189 -0
devcopilot-0.2.0.dist-info/WHEEL +4 -0
devcopilot-0.2.0.dist-info/entry_points.txt +6 -0
devcopilot-0.2.0.dist-info/licenses/LICENSE +21 -0
messaging/__init__.py +26 -0
messaging/cli_event_constants.py +67 -0
messaging/command_context.py +66 -0
messaging/command_dispatcher.py +37 -0
messaging/commands.py +275 -0
messaging/event_parser.py +181 -0
messaging/limiter.py +300 -0
messaging/models.py +36 -0
messaging/node_event_pipeline.py +127 -0
messaging/node_runner.py +342 -0
messaging/platforms/__init__.py +15 -0
messaging/platforms/base.py +228 -0
messaging/platforms/discord.py +567 -0
messaging/platforms/factory.py +103 -0
messaging/platforms/outbox.py +144 -0
messaging/platforms/telegram.py +688 -0
messaging/platforms/voice_flow.py +295 -0
messaging/rendering/__init__.py +3 -0
messaging/rendering/discord_markdown.py +318 -0
messaging/rendering/markdown_tables.py +49 -0
messaging/rendering/profiles.py +55 -0
messaging/rendering/telegram_markdown.py +327 -0
messaging/safe_diagnostics.py +17 -0
messaging/session.py +334 -0
messaging/transcript.py +581 -0
messaging/transcription.py +164 -0
messaging/trees/__init__.py +15 -0
messaging/trees/data.py +482 -0
messaging/trees/manager.py +433 -0
messaging/trees/processor.py +179 -0
messaging/trees/repository.py +177 -0
messaging/turn_intake.py +235 -0
messaging/ui_updates.py +101 -0
messaging/voice.py +76 -0
messaging/workflow.py +200 -0
providers/__init__.py +31 -0
providers/base.py +152 -0
providers/cerebras/__init__.py +7 -0
providers/cerebras/client.py +31 -0
providers/cerebras/request.py +55 -0
providers/codestral/__init__.py +7 -0
providers/codestral/client.py +34 -0
providers/deepseek/__init__.py +11 -0
providers/deepseek/client.py +51 -0
providers/deepseek/request.py +475 -0
providers/defaults.py +41 -0
providers/error_mapping.py +309 -0
providers/exceptions.py +113 -0
providers/fireworks/__init__.py +5 -0
providers/fireworks/client.py +45 -0
providers/fireworks/request.py +48 -0
providers/gemini/__init__.py +7 -0
providers/gemini/client.py +49 -0
providers/gemini/request.py +199 -0
providers/groq/__init__.py +7 -0
providers/groq/client.py +31 -0
providers/groq/request.py +83 -0
providers/kimi/__init__.py +10 -0
providers/kimi/client.py +53 -0
providers/kimi/request.py +42 -0
providers/llamacpp/__init__.py +3 -0
providers/llamacpp/client.py +16 -0
providers/lmstudio/__init__.py +5 -0
providers/lmstudio/client.py +16 -0
providers/mistral/__init__.py +7 -0
providers/mistral/client.py +31 -0
providers/mistral/request.py +37 -0
providers/model_listing.py +133 -0
providers/nvidia_nim/__init__.py +7 -0
providers/nvidia_nim/client.py +91 -0
providers/nvidia_nim/request.py +430 -0
providers/nvidia_nim/voice.py +95 -0
providers/ollama/__init__.py +7 -0
providers/ollama/client.py +39 -0
providers/open_router/__init__.py +7 -0
providers/open_router/client.py +124 -0
providers/open_router/request.py +42 -0
providers/opencode/__init__.py +11 -0
providers/opencode/client.py +31 -0
providers/opencode/request.py +35 -0
providers/rate_limit.py +300 -0
providers/registry.py +527 -0
providers/transports/__init__.py +1 -0
providers/transports/anthropic_messages/__init__.py +5 -0
providers/transports/anthropic_messages/http.py +118 -0
providers/transports/anthropic_messages/recovery.py +206 -0
providers/transports/anthropic_messages/stream.py +295 -0
providers/transports/anthropic_messages/transport.py +236 -0
providers/transports/openai_chat/__init__.py +5 -0
providers/transports/openai_chat/recovery.py +217 -0
providers/transports/openai_chat/stream.py +384 -0
providers/transports/openai_chat/tool_calls.py +293 -0
providers/transports/openai_chat/transport.py +156 -0
providers/wafer/__init__.py +10 -0
providers/wafer/client.py +50 -0
providers/zai/__init__.py +10 -0
providers/zai/client.py +46 -0
providers/zai/request.py +42 -0

cli/managed/session.py ADDED Viewed

@@ -0,0 +1,260 @@
+"""Managed Claude Code subprocess session."""
+import asyncio
+import os
+from collections.abc import AsyncGenerator
+from loguru import logger
+from cli.process_registry import kill_pid_tree_best_effort, register_pid, unregister_pid
+from core.trace import trace_event
+from .claude import (
+    ManagedClaudeConfig,
+    ManagedClaudeParseState,
+    ManagedClaudeTaskRequest,
+    build_managed_claude_invocation,
+    parse_managed_claude_stdout_line,
+)
+# Cap stderr capture so a runaway child cannot exhaust memory; pipe is still drained.
+_MAX_STDERR_CAPTURE_BYTES = 256 * 1024
+class ManagedClaudeSession:
+    """Manages a single persistent Claude Code subprocess."""
+    def __init__(
+        self,
+        workspace_path: str,
+        api_url: str,
+        allowed_dirs: list[str] | None = None,
+        plans_directory: str | None = None,
+        claude_bin: str = "claude",
+        auth_token: str = "",
+        *,
+        log_raw_cli_diagnostics: bool = False,
+    ):
+        self.config = ManagedClaudeConfig(
+            workspace_path=os.path.normpath(os.path.abspath(workspace_path)),
+            api_url=api_url,
+            allowed_dirs=[os.path.normpath(d) for d in (allowed_dirs or [])],
+            plans_directory=plans_directory,
+            claude_bin=claude_bin,
+            auth_token=auth_token,
+        )
+        self.workspace = self.config.workspace_path
+        self.api_url = self.config.api_url
+        self.allowed_dirs = self.config.allowed_dirs
+        self.plans_directory = self.config.plans_directory
+        self.claude_bin = self.config.claude_bin
+        self.auth_token = self.config.auth_token
+        self._log_raw_cli_diagnostics = log_raw_cli_diagnostics
+        self.process: asyncio.subprocess.Process | None = None
+        self.current_session_id: str | None = None
+        self._is_busy = False
+        self._cli_lock = asyncio.Lock()
+    @staticmethod
+    async def _drain_stderr_bounded(
+        process: asyncio.subprocess.Process,
+        *,
+        max_bytes: int = _MAX_STDERR_CAPTURE_BYTES,
+    ) -> bytes:
+        """Read stderr concurrently with stdout to avoid subprocess pipe deadlocks.
+        Retains at most ``max_bytes`` for logging; any excess is discarded, but
+        the pipe is read until EOF so a noisy child cannot fill the buffer and
+        block forever.
+        """
+        if not process.stderr:
+            return b""
+        parts: list[bytes] = []
+        received = 0
+        while True:
+            chunk = await process.stderr.read(65_536)
+            if not chunk:
+                break
+            if received < max_bytes:
+                take = min(len(chunk), max_bytes - received)
+                if take:
+                    parts.append(chunk[:take])
+                    received += take
+            # If already at cap, keep reading and discarding until EOF.
+        return b"".join(parts)
+    @property
+    def is_busy(self) -> bool:
+        """Check if a task is currently running."""
+        return self._is_busy
+    async def start_task(
+        self, prompt: str, session_id: str | None = None, fork_session: bool = False
+    ) -> AsyncGenerator[dict]:
+        """
+        Start a new task or continue an existing session.
+        Args:
+            prompt: The user's message/prompt
+            session_id: Optional session ID to resume
+        Yields:
+            Event dictionaries from the CLI
+        """
+        async with self._cli_lock:
+            self._is_busy = True
+            invocation = build_managed_claude_invocation(
+                config=self.config,
+                request=ManagedClaudeTaskRequest(
+                    prompt=prompt,
+                    session_id=session_id,
+                    fork_session=fork_session,
+                ),
+                base_env=os.environ,
+            )
+            trace_event(
+                stage="claude_cli",
+                event="claude_cli.process.launch",
+                source="claude_cli",
+                **invocation.trace_metadata,
+            )
+            try:
+                self.process = await asyncio.create_subprocess_exec(
+                    *invocation.argv,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE,
+                    cwd=invocation.cwd,
+                    env=invocation.env,
+                )
+                if self.process and self.process.pid:
+                    register_pid(self.process.pid)
+                if not self.process or not self.process.stdout:
+                    yield {"type": "exit", "code": 1}
+                    return
+                parse_state = ManagedClaudeParseState(
+                    log_raw_cli_diagnostics=self._log_raw_cli_diagnostics
+                )
+                buffer = bytearray()
+                stderr_task: asyncio.Task[bytes] | None = None
+                if self.process.stderr:
+                    stderr_task = asyncio.create_task(
+                        self._drain_stderr_bounded(self.process)
+                    )
+                try:
+                    while True:
+                        chunk = await self.process.stdout.read(65536)
+                        if not chunk:
+                            if buffer:
+                                line_str = buffer.decode(
+                                    "utf-8", errors="replace"
+                                ).strip()
+                                if line_str:
+                                    async for event in self._handle_line_gen(
+                                        line_str, parse_state
+                                    ):
+                                        yield event
+                            break
+                        buffer.extend(chunk)
+                        while True:
+                            newline_pos = buffer.find(b"\n")
+                            if newline_pos == -1:
+                                break
+                            line = buffer[:newline_pos]
+                            buffer = buffer[newline_pos + 1 :]
+                            line_str = line.decode("utf-8", errors="replace").strip()
+                            if line_str:
+                                async for event in self._handle_line_gen(
+                                    line_str, parse_state
+                                ):
+                                    yield event
+                except asyncio.CancelledError:
+                    # Cancelling the handler task should not leave a Claude CLI
+                    # subprocess running in the background.
+                    await asyncio.shield(self.stop())
+                    raise
+                finally:
+                    stderr_bytes = b""
+                    if stderr_task is not None:
+                        stderr_bytes = await stderr_task
+                stderr_text = None
+                if stderr_bytes:
+                    stderr_text = stderr_bytes.decode("utf-8", errors="replace").strip()
+                    if stderr_text:
+                        if self._log_raw_cli_diagnostics:
+                            logger.error("Claude CLI stderr: {}", stderr_text)
+                        else:
+                            logger.error(
+                                "Claude CLI stderr: bytes={} text_chars={}",
+                                len(stderr_bytes),
+                                len(stderr_text),
+                            )
+                        logger.info("CLI_SESSION: Yielding error event from stderr")
+                        yield {"type": "error", "error": {"message": stderr_text}}
+                return_code = await self.process.wait()
+                logger.info(
+                    f"Claude CLI exited with code {return_code}, stderr_present={bool(stderr_text)}"
+                )
+                if return_code != 0 and not stderr_text:
+                    logger.warning(
+                        f"CLI_SESSION: Process exited with code {return_code} but no stderr captured"
+                    )
+                yield {
+                    "type": "exit",
+                    "code": return_code,
+                    "stderr": stderr_text,
+                }
+            finally:
+                self._is_busy = False
+                if self.process and self.process.pid:
+                    unregister_pid(self.process.pid)
+    async def _handle_line_gen(
+        self, line_str: str, parse_state: ManagedClaudeParseState
+    ) -> AsyncGenerator[dict]:
+        """Process a single line and yield events."""
+        for event in parse_managed_claude_stdout_line(line_str, parse_state):
+            if isinstance(event, dict) and event.get("type") == "session_info":
+                session_id = event.get("session_id")
+                if isinstance(session_id, str):
+                    self.current_session_id = session_id
+            yield event
+    async def stop(self):
+        """Stop the CLI process."""
+        if self.process and self.process.returncode is None:
+            try:
+                logger.info(f"Stopping Claude CLI process {self.process.pid}")
+                kill_pid_tree_best_effort(self.process.pid)
+                try:
+                    await asyncio.wait_for(self.process.wait(), timeout=5.0)
+                except TimeoutError:
+                    self.process.kill()
+                    await self.process.wait()
+                if self.process and self.process.pid:
+                    unregister_pid(self.process.pid)
+                return True
+            except Exception as e:
+                if self._log_raw_cli_diagnostics:
+                    logger.error(
+                        "Error stopping process: {}: {}",
+                        type(e).__name__,
+                        e,
+                    )
+                else:
+                    logger.error(
+                        "Error stopping process: exc_type={}",
+                        type(e).__name__,
+                    )
+                return False
+        return False

cli/process_registry.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""Track and clean up spawned CLI subprocesses.
+This is a safety net for cases where the server is interrupted (Ctrl+C) and the
+FastAPI lifespan cleanup doesn't run to completion. We only track processes we
+spawn so we don't accidentally kill unrelated system processes.
+"""
+from __future__ import annotations
+import atexit
+import os
+import signal
+import subprocess
+import threading
+from loguru import logger
+_lock = threading.Lock()
+_pids: set[int] = set()
+_atexit_registered = False
+def ensure_atexit_registered() -> None:
+    global _atexit_registered
+    with _lock:
+        if _atexit_registered:
+            return
+        atexit.register(kill_all_best_effort)
+        _atexit_registered = True
+def register_pid(pid: int) -> None:
+    if not pid:
+        return
+    ensure_atexit_registered()
+    with _lock:
+        _pids.add(int(pid))
+def unregister_pid(pid: int) -> None:
+    if not pid:
+        return
+    with _lock:
+        _pids.discard(int(pid))
+def kill_pid_tree_best_effort(pid: int) -> None:
+    """Kill a tracked process and its children where the platform supports it."""
+    if not pid:
+        return
+    if os.name == "nt":
+        try:
+            # /T kills child processes, /F forces termination.
+            subprocess.run(
+                ["taskkill", "/PID", str(pid), "/T", "/F"],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                check=False,
+            )
+        except Exception as e:
+            logger.debug("process_registry: taskkill failed pid=%s: %s", pid, e)
+        return
+    # Best-effort fallback for non-Windows.
+    try:
+        os.kill(pid, signal.SIGTERM)
+    except Exception as e:
+        logger.debug("process_registry: terminate failed pid=%s: %s", pid, e)
+def kill_all_best_effort() -> None:
+    """Kill any still-running registered pids (best-effort)."""
+    with _lock:
+        pids = list(_pids)
+        _pids.clear()
+    for pid in pids:
+        kill_pid_tree_best_effort(pid)

config/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Configuration management."""
+from .settings import Settings, get_settings
+__all__ = ["Settings", "get_settings"]

config/constants.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Shared defaults used by config models and provider adapters."""
+# HTTP client connect timeout (seconds). Keep aligned with README.md and .env.example.
+HTTP_CONNECT_TIMEOUT_DEFAULT = 10.0
+# Anthropic Messages API default when the client omits max_tokens.
+ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS = 81920
+# Max bytes read from a non-200 native messages response when verbose error logging is on.
+NATIVE_MESSAGES_ERROR_BODY_LOG_CAP_BYTES = 4096
+# Max upstream error bytes shown to users for copy/paste diagnostics.
+PROVIDER_ERROR_BODY_DISPLAY_CAP_BYTES = 16384

config/logging_config.py ADDED Viewed

@@ -0,0 +1,159 @@
+"""Loguru-based structured logging configuration.
+Structured logs are written as JSON lines to a configurable path (default
+``logs/server.log``). Stdlib logging is intercepted and funneled to loguru.
+Context vars (request_id, node_id, chat_id) from contextualize() are
+included at top level for easy grep/filter.
+"""
+import json
+import logging
+import re
+import threading
+from pathlib import Path
+from loguru import logger
+_configured = False
+# Loguru ``logger.bind()`` key used by structured TRACE payloads; ``core/trace.py``
+# uses the identical string constant ``TRACE_PAYLOAD_BINDING``.
+_TRACE_PAYLOAD_BINDING = "trace_payload"
+# Context keys we promote to top-level JSON for traceability / grep
+_CONTEXT_KEYS = (
+    "request_id",
+    "node_id",
+    "chat_id",
+    "claude_session_id",
+    "http_method",
+    "http_path",
+)
+_TELEGRAM_BOT_RE = re.compile(
+    r"(https?://api\.telegram\.org/)bot([0-9]+:[A-Za-z0-9_-]+)(/?)",
+    re.IGNORECASE,
+)
+# Authorization: Bearer <token> (HTTP client / proxy debug lines)
+_AUTH_BEARER_RE = re.compile(
+    r"(\bAuthorization\s*:\s*Bearer\s+)([^\s'\"]+)",
+    re.IGNORECASE,
+)
+def _redact_sensitive_substrings(message: str) -> str:
+    """Remove obvious API tokens and secrets before JSON log line emission."""
+    text = _TELEGRAM_BOT_RE.sub(r"\1bot<redacted>\3", message)
+    return _AUTH_BEARER_RE.sub(r"\1<redacted>", text)
+def _serialize_with_context(record) -> str:
+    """Format record as JSON with context vars at top level.
+    Returns a format template; we inject _json into record for output.
+    """
+    extra = record.get("extra", {})
+    out = {
+        "time": str(record["time"]),
+        "level": record["level"].name,
+        "message": _redact_sensitive_substrings(str(record["message"])),
+        "module": record["name"],
+        "function": record["function"],
+        "line": record["line"],
+    }
+    trace_payload = extra.get(_TRACE_PAYLOAD_BINDING)
+    for key in _CONTEXT_KEYS:
+        if key in extra and extra[key] is not None:
+            out[key] = extra[key]
+    if isinstance(trace_payload, dict):
+        for tk, tv in trace_payload.items():
+            if tk in out:
+                continue
+            out[tk] = tv
+        out["trace"] = True
+    record["_json"] = json.dumps(out, default=str)
+    return "{_json}\n"
+class InterceptHandler(logging.Handler):
+    """Redirect stdlib logging to loguru."""
+    def __init__(self) -> None:
+        super().__init__()
+        self._local = threading.local()
+    def emit(self, record: logging.LogRecord) -> None:
+        if getattr(self._local, "active", False):
+            # Avoid deadlock when nested stdlib records fire during a loguru emit.
+            return
+        self._local.active = True
+        try:
+            try:
+                level = logger.level(record.levelname).name
+            except ValueError:
+                level = record.levelno
+            frame, depth = logging.currentframe(), 2
+            while frame is not None and frame.f_code.co_filename == logging.__file__:
+                frame = frame.f_back
+                depth += 1
+            logger.opt(depth=depth, exception=record.exc_info).log(
+                level, record.getMessage()
+            )
+        finally:
+            self._local.active = False
+def configure_logging(
+    log_file: str | Path, *, force: bool = False, verbose_third_party: bool = False
+) -> None:
+    """Configure loguru with JSON output to log_file and intercept stdlib logging.
+    Idempotent: skips if already configured (e.g. hot reload).
+    Use force=True to reconfigure (e.g. in tests with a different log path).
+    When ``verbose_third_party`` is false, noisy HTTP and Telegram loggers are capped
+    at WARNING unless explicitly configured otherwise.
+    """
+    global _configured
+    if _configured and not force:
+        return
+    _configured = True
+    # Remove default loguru handler (writes to stderr)
+    logger.remove()
+    log_path = Path(log_file)
+    log_path.parent.mkdir(parents=True, exist_ok=True)
+    # Truncate log file on fresh start for clean debugging
+    log_path.write_text("")
+    # Add file sink: JSON lines, DEBUG level, context vars at top level
+    logger.add(
+        log_file,
+        level="DEBUG",
+        format=_serialize_with_context,
+        encoding="utf-8",
+        mode="a",
+        rotation="50 MB",
+        enqueue=True,
+    )
+    # Intercept stdlib logging: route all root logger output to loguru
+    intercept = InterceptHandler()
+    logging.root.handlers = [intercept]
+    logging.root.setLevel(logging.DEBUG)
+    third_party = (
+        "httpx",
+        "httpcore",
+        "httpcore.http11",
+        "httpcore.connection",
+        "telegram",
+        "telegram.ext",
+    )
+    for name in third_party:
+        logging.getLogger(name).setLevel(
+            logging.WARNING if not verbose_third_party else logging.NOTSET
+        )

config/nim.py ADDED Viewed

@@ -0,0 +1,118 @@
+"""NVIDIA NIM settings (fixed values, no env config)."""
+from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator
+from config.constants import ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
+class NimSettings(BaseModel):
+    """Fixed NVIDIA NIM settings (not configurable via env)."""
+    temperature: float = Field(
+        1.0, ge=0.0, le=2.0, description="Sampling temperature, must be >=0 and <=2."
+    )
+    top_p: float = Field(
+        1.0, ge=0.0, le=1.0, description="Nucleus sampling probability. [0,1]"
+    )
+    top_k: int = -1
+    max_tokens: int = Field(
+        ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
+        ge=1,
+        description="Maximum number of tokens in output.",
+    )
+    presence_penalty: float = Field(0.0, ge=-2.0, le=2.0)
+    frequency_penalty: float = Field(0.0, ge=-2.0, le=2.0)
+    min_p: float = Field(
+        0.0, ge=0.0, le=1.0, description="Minimum probability threshold [0,1]."
+    )
+    repetition_penalty: float = Field(
+        1.0, ge=0.0, description="Penalty for repeated tokens. Must be >=0."
+    )
+    seed: int | None = None
+    stop: str | None = None
+    parallel_tool_calls: bool = True
+    ignore_eos: bool = False
+    min_tokens: int = Field(0, ge=0, description="Minimum tokens in the response.")
+    chat_template: str | None = None
+    request_id: str | None = None
+    model_config = ConfigDict(extra="forbid")
+    @field_validator("top_k", mode="before")
+    @classmethod
+    def validate_top_k(cls, v, info: ValidationInfo):
+        if v is None or v == "":
+            return -1
+        int_v = int(v)
+        if int_v < -1:
+            raise ValueError(f"{info.field_name} must be -1 or >= 0")
+        return int_v
+    @field_validator(
+        "temperature",
+        "top_p",
+        "min_p",
+        "presence_penalty",
+        "frequency_penalty",
+        "repetition_penalty",
+        mode="before",
+    )
+    @classmethod
+    def validate_float_fields(cls, v, info: ValidationInfo):
+        field_defaults = {
+            "temperature": 1.0,
+            "top_p": 1.0,
+            "min_p": 0.0,
+            "presence_penalty": 0.0,
+            "frequency_penalty": 0.0,
+            "repetition_penalty": 1.0,
+        }
+        if v is None or v == "":
+            key = info.field_name or "temperature"
+            return field_defaults.get(key, 1.0)
+        try:
+            val = float(v)
+        except (TypeError, ValueError) as err:
+            raise ValueError(
+                f"{info.field_name} must be a float. Got {type(v).__name__}."
+            ) from err
+        return val
+    @field_validator("max_tokens", "min_tokens", mode="before")
+    @classmethod
+    def validate_int_fields(cls, v, info: ValidationInfo):
+        field_defaults = {
+            "max_tokens": ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
+            "min_tokens": 0,
+        }
+        if v is None or v == "":
+            key = info.field_name or "max_tokens"
+            return field_defaults.get(key, ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS)
+        try:
+            val = int(v)
+        except (TypeError, ValueError) as err:
+            raise ValueError(
+                f"{info.field_name} must be an int. Got {type(v).__name__}."
+            ) from err
+        return val
+    @field_validator("seed", mode="before")
+    @classmethod
+    def parse_optional_int(cls, v, info: ValidationInfo):
+        if v == "" or v is None:
+            return None
+        try:
+            return int(v)
+        except (TypeError, ValueError) as err:
+            raise ValueError(
+                f"{info.field_name} must be an int or empty/None."
+            ) from err
+    @field_validator("stop", "chat_template", "request_id", mode="before")
+    @classmethod
+    def parse_optional_str(cls, v, info: ValidationInfo):
+        if v == "":
+            return None
+        if v is not None and not isinstance(v, str):
+            return str(v)
+        return v