PyPI - langchain-tool-args-validation-middleware - Versions diffs - 0.1.0__py3-none-any.whl - Mend

langchain-tool-args-validation-middleware 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

langchain_tool_args_validation_middleware/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Validate LLM tool-call arguments against each tool's schema before execution."""
+from ._strip import DEFAULT_PLACEHOLDER_STRINGS, strip_empty
+from ._validation import ValidationIssue
+from .extras import detect_langchain_internal_ids
+from .middleware import (
+    ExtraValidator,
+    OnFailure,
+    ToolArgsValidationError,
+    ToolArgsValidationMiddleware,
+)
+__all__ = [
+    "DEFAULT_PLACEHOLDER_STRINGS",
+    "ExtraValidator",
+    "OnFailure",
+    "ToolArgsValidationError",
+    "ToolArgsValidationMiddleware",
+    "ValidationIssue",
+    "detect_langchain_internal_ids",
+    "strip_empty",
+]
+__version__ = "0.1.0"

langchain_tool_args_validation_middleware/_strip.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""Recursive stripping of "empty" values from LLM-generated tool arguments.
+LLMs (Gemini especially) routinely emit explicit ``null`` or empty containers
+for optional fields instead of omitting them. Stripping these before validation
+avoids unnecessary retries: an optional field simply becomes absent, and a
+required field surfaces a clear ``'<field>' is a required property`` error.
+Design note — write-back contract
+----------------------------------
+When stripping is enabled the *cleaned* arguments replace the originals in the
+tool call, so the cleaned version is what both validation **and tool execution**
+see. This keeps "what we validated" and "what runs" identical (no soundness
+gap), at the cost of mutating the model's output. That trade-off is the whole
+point of stripping, but it means stripping a value that is *semantically
+meaningful* (e.g. ``tags: []`` meaning "clear all tags", or ``null`` meaning
+"explicitly unset") changes behaviour. Container stripping (``None``/``{}``/
+``[]``) is on by default; the far riskier string-placeholder stripping
+(``"none"``, ``"N/A"``, ...) is **opt-in only**, because tokens like ``"NA"``
+are legitimate values (Namibia's ISO code, "North America", ...).
+"""
+from __future__ import annotations
+from typing import Any
+# A conservative, opt-in default set of placeholder strings. Deliberately
+# excludes ambiguous real-world tokens like "na"/"nil". Callers may pass their
+# own set instead. Only used when string stripping is explicitly enabled.
+DEFAULT_PLACEHOLDER_STRINGS: frozenset[str] = frozenset(
+    {"none", "null", "undefined", '""', "''"}
+)
+def strip_empty(
+    value: Any,
+    *,
+    placeholder_strings: frozenset[str] | None = None,
+) -> Any:
+    """Return a copy of *value* with "empty" entries recursively removed.
+    Parameters
+    ----------
+    value:
+        The value to clean (typically a tool call's ``args`` dict, but works on
+        any nested dict/list structure).
+    placeholder_strings:
+        If provided, string values whose stripped/lower-cased form is in this
+        set are also removed. ``None`` (the default) disables string stripping
+        entirely — only ``None``/``{}``/``[]`` are removed.
+    Notes
+    -----
+    Returns a new structure; it never mutates *value* in place. The caller
+    decides whether to write the result back onto the tool call.
+    """
+    if isinstance(value, dict):
+        cleaned: dict[Any, Any] = {}
+        for key, val in value.items():
+            if _is_empty(val, placeholder_strings):
+                continue
+            cleaned[key] = strip_empty(val, placeholder_strings=placeholder_strings)
+        return cleaned
+    if isinstance(value, list):
+        return [
+            strip_empty(item, placeholder_strings=placeholder_strings)
+            for item in value
+            if not _is_empty(item, placeholder_strings)
+        ]
+    return value
+def _is_empty(value: Any, placeholder_strings: frozenset[str] | None) -> bool:
+    """Whether *value* should be dropped during stripping."""
+    if value is None:
+        return True
+    if value == {} or value == []:
+        return True
+    return (
+        placeholder_strings is not None
+        and isinstance(value, str)
+        and value.strip().lower() in placeholder_strings
+    )

langchain_tool_args_validation_middleware/_validation.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""Schema resolution and validation, decoupled from any single validator lib.
+Both validation backends (Pydantic and JSON Schema) are normalised into a
+neutral :class:`ValidationIssue` so the rest of the middleware — error
+formatting, retry — works uniformly and never imports ``jsonschema`` unless a
+dict-schema tool is actually present.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Protocol
+from langchain_core.tools import BaseTool
+from pydantic import BaseModel
+@dataclass(frozen=True)
+class ValidationIssue:
+    """A single normalised validation problem."""
+    path: list[Any]
+    message: str
+    def render(self) -> str:
+        loc = " → ".join(str(p) for p in self.path) or "(root)"
+        return f"  • [{loc}] {self.message}"
+class _JsonSchemaValidator(Protocol):
+    def iter_errors(self, instance: Any) -> Any:  # pragma: no cover - structural
+        ...
+class ToolValidator:
+    """Validates one tool's arguments. Either Pydantic- or JSON-Schema-backed."""
+    __slots__ = ("name", "_pydantic_model", "_json_validator")
+    def __init__(
+        self,
+        name: str,
+        *,
+        pydantic_model: type[BaseModel] | None = None,
+        json_validator: _JsonSchemaValidator | None = None,
+    ) -> None:
+        self.name = name
+        self._pydantic_model = pydantic_model
+        self._json_validator = json_validator
+    def validate(self, args: dict[str, Any]) -> list[ValidationIssue]:
+        if self._pydantic_model is not None:
+            return _validate_pydantic(self._pydantic_model, args)
+        if self._json_validator is not None:
+            return _validate_json_schema(self._json_validator, args)
+        return []
+def resolve_validators(
+    tools: list[BaseTool],
+    *,
+    json_schema_validator_class: type | None,
+) -> dict[str, ToolValidator]:
+    """Build a name → :class:`ToolValidator` map from a list of tools.
+    Tools with a ``dict`` ``args_schema`` are validated via JSON Schema; tools
+    with a Pydantic ``BaseModel`` subclass via ``model_validate``. Tools with
+    neither are skipped (they pass through unvalidated). ``jsonschema`` is
+    imported lazily here, and only if at least one dict-schema tool exists.
+    """
+    validators: dict[str, ToolValidator] = {}
+    validator_cls = json_schema_validator_class
+    for tool in tools:
+        schema = getattr(tool, "args_schema", None)
+        if isinstance(schema, dict):
+            if validator_cls is None:
+                validator_cls = _default_json_validator_class()
+            validators[tool.name] = ToolValidator(
+                tool.name, json_validator=validator_cls(schema)
+            )
+        elif isinstance(schema, type) and issubclass(schema, BaseModel):
+            validators[tool.name] = ToolValidator(tool.name, pydantic_model=schema)
+        # else: unknown schema shape → no validator → passes through.
+    return validators
+def _default_json_validator_class() -> type:
+    try:
+        from jsonschema import Draft7Validator
+    except ImportError as exc:  # pragma: no cover - import guard
+        raise ImportError(
+            "A tool with a JSON Schema (dict) args_schema was provided, but "
+            "'jsonschema' is not installed. Install it with "
+            "`pip install langchain-tool-args-validation-middleware[jsonschema]`, or pass a "
+            "custom `json_schema_validator_class`."
+        ) from exc
+    return Draft7Validator  # type: ignore[no-any-return]
+def _validate_pydantic(
+    model: type[BaseModel], args: dict[str, Any]
+) -> list[ValidationIssue]:
+    # Import locally so a missing/v1 pydantic surfaces at call time, not import.
+    from pydantic import ValidationError
+    try:
+        model.model_validate(args)
+        return []
+    except ValidationError as exc:
+        return [
+            ValidationIssue(path=list(e["loc"]), message=str(e["msg"]))
+            for e in exc.errors()
+        ]
+def _validate_json_schema(
+    validator: _JsonSchemaValidator, args: dict[str, Any]
+) -> list[ValidationIssue]:
+    issues: list[ValidationIssue] = []
+    for err in validator.iter_errors(args):
+        issues.append(
+            ValidationIssue(path=list(err.absolute_path), message=err.message)
+        )
+    return issues
+def format_issues(tool_name: str, issues: list[ValidationIssue]) -> str:
+    """Build a concise, LLM-friendly description of validation errors."""
+    parts = [
+        f"Tool '{tool_name}' argument validation failed. "
+        "Fix the following errors and retry:"
+    ]
+    parts.extend(issue.render() for issue in issues)
+    parts.append(
+        "\nHint: if a field is optional and not needed, omit it entirely from "
+        "the arguments rather than setting it to null or an empty value."
+    )
+    return "\n".join(parts)

langchain_tool_args_validation_middleware/extras.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""Optional, pluggable extra validators for :data:`ExtraValidator`.
+These are *not* part of the core middleware behaviour — they encode
+domain-specific heuristics that some users want and others don't. Opt in by
+passing them via ``extra_validators=[...]``.
+"""
+from __future__ import annotations
+import re
+from typing import Any
+# LangChain internal message IDs (``lc_<uuid4>``). LLMs sometimes lift these out
+# of a ToolMessage envelope and pass them as real data identifiers.
+_LANGCHAIN_ID_RE = re.compile(
+    r"^lc_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
+    re.IGNORECASE,
+)
+def detect_langchain_internal_ids(name: str, args: dict[str, Any]) -> list[str]:
+    """Flag any arg value that looks like a leaked ``lc_<uuid>`` internal ID.
+    Use via ``extra_validators=[detect_langchain_internal_ids]``.
+    """
+    errors: list[str] = []
+    def check(key: str, value: Any, *, in_list: bool) -> None:
+        if isinstance(value, str) and _LANGCHAIN_ID_RE.match(value):
+            where = " in a list" if in_list else ""
+            errors.append(
+                f"Tool '{name}' argument '{key}' contains a LangChain internal "
+                f"ID ('{value}'){where}. This is NOT a valid data identifier — "
+                "use only real resource IDs from the API response data, not IDs "
+                "from the tool-call metadata envelope."
+            )
+    for key, value in args.items():
+        if isinstance(value, list):
+            for item in value:
+                check(key, item, in_list=True)
+        else:
+            check(key, value, in_list=False)
+    return errors

langchain_tool_args_validation_middleware/middleware.py ADDED Viewed

@@ -0,0 +1,318 @@
+"""``ToolArgsValidationMiddleware`` — validate LLM tool-call args before execution.
+The middleware wraps the model invocation (``wrap_model_call`` /
+``awrap_model_call``). After each model response it validates every tool call's
+arguments against the tool's schema. On failure it appends error
+``ToolMessage``\\s and re-invokes the model so it can self-correct. The retry
+loop runs entirely inside the model node, so only the final ``AIMessage`` enters
+the graph state — and any human-in-the-loop step that runs *after* the model
+node never sees invalid arguments.
+"""
+from __future__ import annotations
+import logging
+from collections.abc import Awaitable, Callable, Generator
+from typing import Any, Literal, cast
+from langchain.agents.middleware import AgentMiddleware
+from langchain.agents.middleware.types import ModelRequest, ModelResponse
+from langchain_core.messages import AIMessage, AnyMessage, ToolMessage
+from langchain_core.tools import BaseTool
+from ._strip import DEFAULT_PLACEHOLDER_STRINGS, strip_empty
+from ._validation import (
+    ToolValidator,
+    format_issues,
+    resolve_validators,
+)
+logger = logging.getLogger(__name__)
+# A user-supplied extra check: given (tool_name, args) return a list of error
+# strings (empty = no problem). Lets callers plug in domain rules (e.g. catching
+# leaked internal IDs) without bloating the core.
+ExtraValidator = Callable[[str, "dict[str, Any]"], "list[str]"]
+OnFailure = Literal["pass", "raise"]
+class ToolArgsValidationError(RuntimeError):
+    """Raised when validation retries are exhausted and ``on_failure='raise'``."""
+_BATCH_SIBLING_NOTICE = (
+    "This tool call was not executed because another tool call in the same "
+    "batch failed argument validation. Re-issue all tool calls together with "
+    "corrected arguments."
+)
+class ToolArgsValidationMiddleware(AgentMiddleware):
+    """Validate tool-call arguments against each tool's schema, with retry.
+    Parameters
+    ----------
+    tools:
+        Optional explicit tool list. If omitted (the default), schemas are
+        resolved lazily from ``request.tools`` on each call and cached by the
+        set of tool names — so dynamic toolsets (tools added/removed by other
+        middleware) stay correct rather than going stale.
+    max_retries:
+        Number of validation-retry cycles per model invocation (default ``2``).
+        Up to ``max_retries + 1`` model calls may be made.
+    strip_empty_values:
+        If ``True`` (default), recursively remove keys whose value is ``None``,
+        ``{}`` or ``[]`` before validation. The cleaned args are written back
+        onto the tool call, so they are also what the tool executes. See
+        :mod:`._strip` for the write-back contract and its caveats.
+    strip_placeholder_strings:
+        If ``True``, also strip string values that look like empty placeholders
+        (e.g. ``"null"``, ``"none"``). **Off by default** because tokens like
+        ``"NA"`` are legitimate values. Combine with ``placeholder_strings`` to
+        control the set. Has no effect unless ``strip_empty_values`` is ``True``.
+    placeholder_strings:
+        The set used when ``strip_placeholder_strings`` is enabled. Defaults to
+        a conservative built-in set.
+    json_schema_validator_class:
+        Validator class for dict-schema (MCP) tools. ``None`` (default) lazily
+        imports ``jsonschema.Draft7Validator``.
+    extra_validators:
+        Optional extra per-tool-call checks (see :data:`ExtraValidator`).
+    on_failure:
+        What to do after retries are exhausted with the args still invalid:
+        ``"pass"`` (default) returns the last response unchanged (fail open —
+        downstream tool error handling takes over); ``"raise"`` raises
+        :class:`ToolArgsValidationError`.
+    """
+    def __init__(
+        self,
+        *,
+        tools: list[BaseTool] | None = None,
+        max_retries: int = 2,
+        strip_empty_values: bool = True,
+        strip_placeholder_strings: bool = False,
+        placeholder_strings: frozenset[str] = DEFAULT_PLACEHOLDER_STRINGS,
+        json_schema_validator_class: type | None = None,
+        extra_validators: list[ExtraValidator] | None = None,
+        on_failure: OnFailure = "pass",
+    ) -> None:
+        super().__init__()
+        self._max_retries = max_retries
+        self._strip_empty_values = strip_empty_values
+        self._placeholder_strings = (
+            placeholder_strings if strip_placeholder_strings else None
+        )
+        self._json_schema_validator_class = json_schema_validator_class
+        self._extra_validators = extra_validators or []
+        self._on_failure = on_failure
+        # Cache of {frozenset(tool names) -> {tool name -> ToolValidator}}.
+        self._cache: dict[frozenset[str], dict[str, ToolValidator]] = {}
+        self._explicit: dict[str, ToolValidator] | None = (
+            resolve_validators(
+                tools, json_schema_validator_class=json_schema_validator_class
+            )
+            if tools is not None
+            else None
+        )
+    # ------------------------------------------------------------------ #
+    # Hooks
+    # ------------------------------------------------------------------ #
+    def wrap_model_call(
+        self,
+        request: ModelRequest[Any],
+        handler: Callable[[ModelRequest[Any]], ModelResponse[Any]],
+    ) -> ModelResponse[Any]:
+        validators = self._resolve(request)
+        if not validators:
+            return handler(request)
+        # Drive the shared validate/retry generator with a synchronous handler.
+        loop = self._validate_loop(validators, request, handler(request))
+        try:
+            retry_request = next(loop)
+            while True:
+                retry_request = loop.send(handler(retry_request))
+        except StopIteration as stop:
+            return cast(ModelResponse[Any], stop.value)
+    async def awrap_model_call(
+        self,
+        request: ModelRequest[Any],
+        handler: Callable[[ModelRequest[Any]], Awaitable[ModelResponse[Any]]],
+    ) -> ModelResponse[Any]:
+        validators = self._resolve(request)
+        if not validators:
+            return await handler(request)
+        # Same generator, driven with an async handler.
+        loop = self._validate_loop(validators, request, await handler(request))
+        try:
+            retry_request = next(loop)
+            while True:
+                retry_request = loop.send(await handler(retry_request))
+        except StopIteration as stop:
+            return cast(ModelResponse[Any], stop.value)
+    # ------------------------------------------------------------------ #
+    # Core logic (single source of truth, shared by sync + async)
+    # ------------------------------------------------------------------ #
+    def _validate_loop(
+        self,
+        validators: dict[str, ToolValidator],
+        request: ModelRequest[Any],
+        first_response: ModelResponse[Any],
+    ) -> Generator[ModelRequest[Any], ModelResponse[Any], ModelResponse[Any]]:
+        """Validate-and-retry as a sans-I/O generator.
+        Yields the request to re-run and receives the resulting response back via
+        ``send``; returns the response to surface to the model node. Every
+        response — including the one from the final retry — is validated before
+        deciding whether retries are exhausted.
+        """
+        convo: list[AnyMessage] = list(request.messages)
+        response = first_response
+        for attempt in range(self._max_retries + 1):
+            ai_msg, errors = self._check(validators, response)
+            if not errors:
+                return response
+            if attempt == self._max_retries:
+                return self._exhausted(response)
+            # errors is non-empty only after validating tool calls on an AIMessage.
+            assert ai_msg is not None
+            self._log_retry(attempt + 1)
+            convo = [*convo, ai_msg, *errors]
+            response = yield request.override(messages=convo)
+        return self._exhausted(response)  # unreachable; keeps types total
+    def _resolve(self, request: ModelRequest[Any]) -> dict[str, ToolValidator]:
+        if self._explicit is not None:
+            return self._explicit
+        tools: list[BaseTool] = list(getattr(request, "tools", []) or [])
+        key = frozenset(t.name for t in tools)
+        cached = self._cache.get(key)
+        if cached is None:
+            cached = resolve_validators(
+                tools, json_schema_validator_class=self._json_schema_validator_class
+            )
+            self._cache[key] = cached
+        return cached
+    def _check(
+        self, validators: dict[str, ToolValidator], response: ModelResponse[Any]
+    ) -> tuple[AIMessage | None, list[ToolMessage]]:
+        """Validate the response's tool calls.
+        Returns ``(ai_msg, error_messages)``. ``error_messages`` is empty when
+        there is nothing to retry (no AI message, no tool calls, or all valid).
+        """
+        ai_msg = _get_ai_message(response)
+        if ai_msg is None or not ai_msg.tool_calls:
+            return ai_msg, []
+        return ai_msg, self._validate_tool_calls(validators, ai_msg)
+    def _validate_tool_calls(
+        self, validators: dict[str, ToolValidator], ai_msg: AIMessage
+    ) -> list[ToolMessage]:
+        """Validate every tool call in *ai_msg*; return error ``ToolMessage``\\s.
+        Batch contract: if *any* tool call is invalid, *every* tool call in the
+        message gets a ``ToolMessage`` (errors for the bad ones, a "not executed"
+        notice for the good ones). Providers require each ``tool_call`` to have a
+        matching response, and the good calls have not actually run yet (we are
+        inside the model node), so they cannot get real results. Returns an empty
+        list only when all tool calls are valid.
+        """
+        error_msgs: list[ToolMessage] = []
+        valid_ids: list[str] = []
+        for tc in ai_msg.tool_calls:
+            name = tc.get("name") or ""
+            call_id = tc.get("id") or ""
+            args = tc.get("args") or {}
+            if self._strip_empty_values:
+                args = strip_empty(args, placeholder_strings=self._placeholder_strings)
+                tc["args"] = args  # write-back: cleaned args are what executes
+            errors = self._errors_for_call(validators, name, args)
+            if errors:
+                error_msgs.append(ToolMessage(content=errors, tool_call_id=call_id))
+            else:
+                valid_ids.append(call_id)
+        if not error_msgs:
+            return []
+        # Every sibling tool call needs a response too (provider requirement).
+        error_msgs.extend(
+            ToolMessage(content=_BATCH_SIBLING_NOTICE, tool_call_id=cid)
+            for cid in valid_ids
+        )
+        return error_msgs
+    def _errors_for_call(
+        self, validators: dict[str, ToolValidator], name: str, args: dict[str, Any]
+    ) -> str:
+        """Return a formatted error string for one tool call, or ``""`` if valid.
+        Unknown tools (no validator) pass through. Schema issues and any
+        ``extra_validators`` findings are combined into one message.
+        """
+        validator = validators.get(name)
+        if validator is None:
+            # Still run extra validators on unknown tools — they may carry rules
+            # that don't depend on a registered schema.
+            extra = self._run_extra_validators(name, args)
+            return "\n".join(extra) if extra else ""
+        issues = validator.validate(args)
+        extra = self._run_extra_validators(name, args)
+        if not issues and not extra:
+            return ""
+        parts: list[str] = []
+        if issues:
+            parts.append(format_issues(name, issues))
+        parts.extend(extra)
+        logger.warning("Validation failed for tool '%s': %s", name, "; ".join(parts))
+        return "\n".join(parts)
+    def _run_extra_validators(self, name: str, args: dict[str, Any]) -> list[str]:
+        out: list[str] = []
+        for check in self._extra_validators:
+            out.extend(check(name, args))
+        return out
+    def _log_retry(self, attempt: int) -> None:
+        logger.warning(
+            "Tool-arg validation failed (attempt %d/%d); re-invoking model",
+            attempt,
+            self._max_retries,
+        )
+    def _exhausted(self, response: ModelResponse[Any]) -> ModelResponse[Any]:
+        if self._on_failure == "raise":
+            raise ToolArgsValidationError(
+                f"Tool-call arguments still invalid after {self._max_retries} "
+                "validation retries."
+            )
+        logger.warning(
+            "Tool-arg validation retries exhausted (%d); passing response through",
+            self._max_retries,
+        )
+        return response
+def _get_ai_message(response: ModelResponse[Any]) -> AIMessage | None:
+    """Extract the AIMessage from a ModelResponse, if present."""
+    for msg in getattr(response, "result", None) or []:
+        if isinstance(msg, AIMessage):
+            return msg
+    return None

langchain_tool_args_validation_middleware/py.typed ADDED Viewed

File without changes

langchain_tool_args_validation_middleware-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,155 @@
+Metadata-Version: 2.4
+Name: langchain-tool-args-validation-middleware
+Version: 0.1.0
+Summary: LangChain agent middleware that validates LLM-generated tool-call arguments against each tool's schema before tool execution / HITL.
+Project-URL: Homepage, https://github.com/Serjbory/langchain-tool-args-validation-middleware
+Project-URL: Repository, https://github.com/Serjbory/langchain-tool-args-validation-middleware
+Author: Serj
+License-Expression: MIT
+License-File: LICENSE
+Keywords: agents,langchain,mcp,middleware,tools,validation
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Software Development :: Libraries
+Requires-Python: >=3.10
+Requires-Dist: langchain-core>=0.3.0
+Requires-Dist: langchain>=1.0.0
+Requires-Dist: pydantic>=2.0
+Provides-Extra: dev
+Requires-Dist: jsonschema>=4.0; extra == 'dev'
+Requires-Dist: mypy; extra == 'dev'
+Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
+Requires-Dist: pytest-cov>=5.0; extra == 'dev'
+Requires-Dist: pytest>=8.0; extra == 'dev'
+Requires-Dist: ruff; extra == 'dev'
+Provides-Extra: jsonschema
+Requires-Dist: jsonschema>=4.0; extra == 'jsonschema'
+Provides-Extra: test
+Requires-Dist: jsonschema>=4.0; extra == 'test'
+Requires-Dist: pytest-asyncio>=0.23; extra == 'test'
+Requires-Dist: pytest-cov>=5.0; extra == 'test'
+Requires-Dist: pytest>=8.0; extra == 'test'
+Description-Content-Type: text/markdown
+# langchain-tool-args-validation-middleware
+A LangChain agent middleware that validates LLM-generated **tool-call arguments**
+against each tool's schema **before** the tool runs (and before any
+human-in-the-loop approval step). When arguments are invalid it appends error
+`ToolMessage`s and re-invokes the model so it can self-correct — all inside the
+model node, so only the final valid `AIMessage` ever enters the graph state.
+```bash
+pip install langchain-tool-args-validation-middleware            # Pydantic tools only
+pip install "langchain-tool-args-validation-middleware[jsonschema]"  # + MCP / dict-schema tools
+```
+## Why
+LLMs frequently emit malformed tool calls: missing required fields, wrong types,
+hallucinated empty values, or extra keys. Without validation those reach the
+tool node and cause runtime errors or silent corruption — and in
+human-in-the-loop workflows, a human is asked to approve obviously-broken
+arguments. Catching this at the model boundary lets the agent fix itself in one
+extra model call instead of a full agent-loop iteration.
+It complements, rather than replaces, `ToolRetryMiddleware` (retries on tool
+*exceptions*) and `ModelRetryMiddleware` (retries on model *exceptions*): this
+one retries on *schema violations*, before execution.
+![Trace showing the middleware catching an invalid tool call and prompting the model to self-correct](https://raw.githubusercontent.com/Serjbory/langchain-tool-args-validation-middleware/main/docs/images/trace-example.jpg)
+*A trace of `create_oos_alert`: the model emitted arguments that violate the
+schema, the middleware rejected them with a precise error and a corrective hint,
+and the model retried — all inside the model node, before the tool ran.*
+## Usage
+```python
+from langchain.agents import create_agent
+from langchain_tool_args_validation_middleware import ToolArgsValidationMiddleware
+agent = create_agent(
+    model,
+    tools=tools,
+    middleware=[ToolArgsValidationMiddleware()],  # resolves schemas from the agent's tools
+)
+```
+Both validation paths are supported automatically:
+- **Pydantic tools** (`@tool`, or any tool with a `BaseModel` `args_schema`) →
+  validated with `BaseModel.model_validate`.
+- **MCP / dict-schema tools** (`args_schema` is a raw JSON Schema `dict`) →
+  validated with `jsonschema` (soft dependency, `Draft7Validator` by default).
+Unknown tools (no resolvable schema) pass through unvalidated.
+## Configuration
+| Parameter | Default | Description |
+|---|---|---|
+| `tools` | `None` | Explicit tool list. If omitted, schemas are resolved lazily from `request.tools` and cached by tool-name set (handles dynamic toolsets). |
+| `max_retries` | `2` | Validation-retry cycles per model invocation (up to `max_retries + 1` model calls). |
+| `strip_empty_values` | `True` | Recursively drop `None` / `{}` / `[]` before validation. |
+| `strip_placeholder_strings` | `False` | Also drop placeholder strings like `"null"`. Off by default — see below. |
+| `placeholder_strings` | conservative set | Set used when string stripping is enabled. |
+| `json_schema_validator_class` | `None` | Override the JSON Schema validator class. `None` → lazy `Draft7Validator`. |
+| `extra_validators` | `None` | Extra `(name, args) -> list[str]` checks for domain rules. |
+| `on_failure` | `"pass"` | After retries are exhausted: `"pass"` (fail open) or `"raise"`. |
+## Design decisions for the two thorniest cases
+### Batch (partial) failure
+Providers (Anthropic, Gemini, OpenAI) require that **every** `tool_call` in an
+assistant message receive a matching `ToolMessage` before the next turn. So when
+a multi-call turn has *any* invalid call, the middleware emits:
+- an **error** `ToolMessage` for each invalid call, and
+- a **"not executed"** notice for each *valid* sibling call (it hasn't run yet —
+  we're still inside the model node — so it can't have a real result), asking the
+  model to re-issue the whole batch with corrected arguments.
+The failed `AIMessage` is placed before these `ToolMessage`s, and failed turns
+accumulate across retries so the model sees its repeated mistakes.
+### `strip_empty_values` and the write-back contract
+LLMs (Gemini especially) emit explicit `null`/`{}`/`[]` for optional fields
+instead of omitting them, causing needless validation failures. When stripping
+is on, the **cleaned arguments replace the originals on the tool call**, so what
+we validate is exactly what executes — no soundness gap between validation and
+execution.
+The trade-off: stripping a value that is *meaningfully empty* (e.g. `tags: []`
+meaning "clear all tags", or `null` meaning "explicitly unset") changes
+behaviour. Container stripping (`None`/`{}`/`[]`) is on by default because it's
+usually safe. **String-placeholder stripping is opt-in only** — tokens like
+`"NA"` (Namibia's ISO code) are legitimate values and must never be dropped
+silently. Enable it deliberately with `strip_placeholder_strings=True` and a set
+you control.
+### Fail-open
+After `max_retries`, the default `on_failure="pass"` returns the last response
+unchanged — the (still-invalid) args reach the tool node, where normal tool
+error handling takes over. This makes the middleware best-effort
+self-correction, not a hard guarantee. Use `on_failure="raise"` if you'd rather
+surface a `ToolArgsValidationError`.
+## Extra validators
+Plug in domain rules without touching core behaviour. A bundled example flags
+LangChain internal message IDs (`lc_<uuid>`) that LLMs sometimes mistake for
+real data identifiers:
+```python
+from langchain_tool_args_validation_middleware import detect_langchain_internal_ids
+ToolArgsValidationMiddleware(extra_validators=[detect_langchain_internal_ids])
+```
+## License
+MIT

langchain_tool_args_validation_middleware-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+langchain_tool_args_validation_middleware/__init__.py,sha256=9RJcLk_PWeOVdsZg-qXm0b4saxOcfb62JeCetxpj3ws,621
+langchain_tool_args_validation_middleware/_strip.py,sha256=CcIqt0K73x4jYjMeHg7YHwuicek39UcFMFD19uzEGho,3285
+langchain_tool_args_validation_middleware/_validation.py,sha256=ylL2CD-6STAjVC50X2yZ8Q-9TmJhEiHeWF11Rlgzlcg,4829
+langchain_tool_args_validation_middleware/extras.py,sha256=FAM_XyFLbZQ-5WPxid0k6BE57lJSRy-SIZxG1sQHTfg,1629
+langchain_tool_args_validation_middleware/middleware.py,sha256=ezcKaStT3lvChXSHdnR_K3yWUsEgsI5P7s5NDI2cGBI,13129
+langchain_tool_args_validation_middleware/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+langchain_tool_args_validation_middleware-0.1.0.dist-info/METADATA,sha256=6wE1povvfpTFqOLUwMl5rE34nTRvsINAiBV7e9_jv4U,7166
+langchain_tool_args_validation_middleware-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+langchain_tool_args_validation_middleware-0.1.0.dist-info/licenses/LICENSE,sha256=-qbRwFG05BhSnZR2O8BvzMqyUjiU_lIMPnyp1pUuvms,1061
+langchain_tool_args_validation_middleware-0.1.0.dist-info/RECORD,,

langchain_tool_args_validation_middleware-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

langchain_tool_args_validation_middleware-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Serj
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.