langchain-tool-args-validation-middleware 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. langchain_tool_args_validation_middleware-0.1.0/.github/workflows/publish.yml +85 -0
  2. langchain_tool_args_validation_middleware-0.1.0/.gitignore +11 -0
  3. langchain_tool_args_validation_middleware-0.1.0/LICENSE +21 -0
  4. langchain_tool_args_validation_middleware-0.1.0/PKG-INFO +155 -0
  5. langchain_tool_args_validation_middleware-0.1.0/README.md +122 -0
  6. langchain_tool_args_validation_middleware-0.1.0/docs/images/trace-example.jpg +0 -0
  7. langchain_tool_args_validation_middleware-0.1.0/justfile +72 -0
  8. langchain_tool_args_validation_middleware-0.1.0/pyproject.toml +69 -0
  9. langchain_tool_args_validation_middleware-0.1.0/src/langchain_tool_args_validation_middleware/__init__.py +24 -0
  10. langchain_tool_args_validation_middleware-0.1.0/src/langchain_tool_args_validation_middleware/_strip.py +82 -0
  11. langchain_tool_args_validation_middleware-0.1.0/src/langchain_tool_args_validation_middleware/_validation.py +140 -0
  12. langchain_tool_args_validation_middleware-0.1.0/src/langchain_tool_args_validation_middleware/extras.py +44 -0
  13. langchain_tool_args_validation_middleware-0.1.0/src/langchain_tool_args_validation_middleware/middleware.py +318 -0
  14. langchain_tool_args_validation_middleware-0.1.0/src/langchain_tool_args_validation_middleware/py.typed +0 -0
  15. langchain_tool_args_validation_middleware-0.1.0/tests/conftest.py +41 -0
  16. langchain_tool_args_validation_middleware-0.1.0/tests/test_middleware.py +287 -0
  17. langchain_tool_args_validation_middleware-0.1.0/tests/test_strip.py +45 -0
  18. langchain_tool_args_validation_middleware-0.1.0/tests/test_validation.py +64 -0
  19. langchain_tool_args_validation_middleware-0.1.0/uv.lock +2029 -0
@@ -0,0 +1,85 @@
1
+ name: Publish
2
+
3
+ # Publish a new release whenever a version tag (e.g. v0.2.0) is pushed.
4
+ # Flow: build once -> publish to TestPyPI -> publish to PyPI.
5
+ # Authentication uses PyPI Trusted Publishing (OIDC); no API tokens/secrets.
6
+
7
+ on:
8
+ push:
9
+ tags:
10
+ - "v*"
11
+
12
+ # No permissions by default; each job opts into exactly what it needs.
13
+ permissions: {}
14
+
15
+ jobs:
16
+ build:
17
+ name: Build distributions
18
+ runs-on: ubuntu-latest
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - name: Install uv
23
+ uses: astral-sh/setup-uv@v6
24
+
25
+ - name: Verify tag matches package version
26
+ run: |
27
+ tag="${GITHUB_REF_NAME#v}"
28
+ pkg="$(uv version --short --no-sync)"
29
+ echo "tag=$tag pyproject=$pkg"
30
+ if [ "$tag" != "$pkg" ]; then
31
+ echo "::error::Tag $GITHUB_REF_NAME does not match pyproject version $pkg. Bump the version before tagging."
32
+ exit 1
33
+ fi
34
+
35
+ - name: Build sdist and wheel
36
+ run: uv build
37
+
38
+ - name: Upload dist artifact
39
+ uses: actions/upload-artifact@v4
40
+ with:
41
+ name: dist
42
+ path: dist/
43
+
44
+ testpypi:
45
+ name: Publish to TestPyPI
46
+ needs: build
47
+ runs-on: ubuntu-latest
48
+ environment: testpypi
49
+ permissions:
50
+ id-token: write # required for trusted publishing
51
+ steps:
52
+ - name: Download dist artifact
53
+ uses: actions/download-artifact@v4
54
+ with:
55
+ name: dist
56
+ path: dist/
57
+
58
+ - name: Install uv
59
+ uses: astral-sh/setup-uv@v6
60
+
61
+ - name: Publish to TestPyPI
62
+ run: >-
63
+ uv publish
64
+ --trusted-publishing always
65
+ --publish-url https://test.pypi.org/legacy/
66
+
67
+ pypi:
68
+ name: Publish to PyPI
69
+ needs: testpypi
70
+ runs-on: ubuntu-latest
71
+ environment: pypi
72
+ permissions:
73
+ id-token: write # required for trusted publishing
74
+ steps:
75
+ - name: Download dist artifact
76
+ uses: actions/download-artifact@v4
77
+ with:
78
+ name: dist
79
+ path: dist/
80
+
81
+ - name: Install uv
82
+ uses: astral-sh/setup-uv@v6
83
+
84
+ - name: Publish to PyPI
85
+ run: uv publish --trusted-publishing always
@@ -0,0 +1,11 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .pytest_cache/
6
+ .mypy_cache/
7
+ .ruff_cache/
8
+ .coverage
9
+ htmlcov/
10
+ dist/
11
+ build/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Serj
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,155 @@
1
+ Metadata-Version: 2.4
2
+ Name: langchain-tool-args-validation-middleware
3
+ Version: 0.1.0
4
+ Summary: LangChain agent middleware that validates LLM-generated tool-call arguments against each tool's schema before tool execution / HITL.
5
+ Project-URL: Homepage, https://github.com/Serjbory/langchain-tool-args-validation-middleware
6
+ Project-URL: Repository, https://github.com/Serjbory/langchain-tool-args-validation-middleware
7
+ Author: Serj
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: agents,langchain,mcp,middleware,tools,validation
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Topic :: Software Development :: Libraries
14
+ Requires-Python: >=3.10
15
+ Requires-Dist: langchain-core>=0.3.0
16
+ Requires-Dist: langchain>=1.0.0
17
+ Requires-Dist: pydantic>=2.0
18
+ Provides-Extra: dev
19
+ Requires-Dist: jsonschema>=4.0; extra == 'dev'
20
+ Requires-Dist: mypy; extra == 'dev'
21
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
22
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
23
+ Requires-Dist: pytest>=8.0; extra == 'dev'
24
+ Requires-Dist: ruff; extra == 'dev'
25
+ Provides-Extra: jsonschema
26
+ Requires-Dist: jsonschema>=4.0; extra == 'jsonschema'
27
+ Provides-Extra: test
28
+ Requires-Dist: jsonschema>=4.0; extra == 'test'
29
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'test'
30
+ Requires-Dist: pytest-cov>=5.0; extra == 'test'
31
+ Requires-Dist: pytest>=8.0; extra == 'test'
32
+ Description-Content-Type: text/markdown
33
+
34
+ # langchain-tool-args-validation-middleware
35
+
36
+ A LangChain agent middleware that validates LLM-generated **tool-call arguments**
37
+ against each tool's schema **before** the tool runs (and before any
38
+ human-in-the-loop approval step). When arguments are invalid it appends error
39
+ `ToolMessage`s and re-invokes the model so it can self-correct — all inside the
40
+ model node, so only the final valid `AIMessage` ever enters the graph state.
41
+
42
+ ```bash
43
+ pip install langchain-tool-args-validation-middleware # Pydantic tools only
44
+ pip install "langchain-tool-args-validation-middleware[jsonschema]" # + MCP / dict-schema tools
45
+ ```
46
+
47
+ ## Why
48
+
49
+ LLMs frequently emit malformed tool calls: missing required fields, wrong types,
50
+ hallucinated empty values, or extra keys. Without validation those reach the
51
+ tool node and cause runtime errors or silent corruption — and in
52
+ human-in-the-loop workflows, a human is asked to approve obviously-broken
53
+ arguments. Catching this at the model boundary lets the agent fix itself in one
54
+ extra model call instead of a full agent-loop iteration.
55
+
56
+ It complements, rather than replaces, `ToolRetryMiddleware` (retries on tool
57
+ *exceptions*) and `ModelRetryMiddleware` (retries on model *exceptions*): this
58
+ one retries on *schema violations*, before execution.
59
+
60
+ ![Trace showing the middleware catching an invalid tool call and prompting the model to self-correct](https://raw.githubusercontent.com/Serjbory/langchain-tool-args-validation-middleware/main/docs/images/trace-example.jpg)
61
+
62
+ *A trace of `create_oos_alert`: the model emitted arguments that violate the
63
+ schema, the middleware rejected them with a precise error and a corrective hint,
64
+ and the model retried — all inside the model node, before the tool ran.*
65
+
66
+ ## Usage
67
+
68
+ ```python
69
+ from langchain.agents import create_agent
70
+ from langchain_tool_args_validation_middleware import ToolArgsValidationMiddleware
71
+
72
+ agent = create_agent(
73
+ model,
74
+ tools=tools,
75
+ middleware=[ToolArgsValidationMiddleware()], # resolves schemas from the agent's tools
76
+ )
77
+ ```
78
+
79
+ Both validation paths are supported automatically:
80
+
81
+ - **Pydantic tools** (`@tool`, or any tool with a `BaseModel` `args_schema`) →
82
+ validated with `BaseModel.model_validate`.
83
+ - **MCP / dict-schema tools** (`args_schema` is a raw JSON Schema `dict`) →
84
+ validated with `jsonschema` (soft dependency, `Draft7Validator` by default).
85
+
86
+ Unknown tools (no resolvable schema) pass through unvalidated.
87
+
88
+ ## Configuration
89
+
90
+ | Parameter | Default | Description |
91
+ |---|---|---|
92
+ | `tools` | `None` | Explicit tool list. If omitted, schemas are resolved lazily from `request.tools` and cached by tool-name set (handles dynamic toolsets). |
93
+ | `max_retries` | `2` | Validation-retry cycles per model invocation (up to `max_retries + 1` model calls). |
94
+ | `strip_empty_values` | `True` | Recursively drop `None` / `{}` / `[]` before validation. |
95
+ | `strip_placeholder_strings` | `False` | Also drop placeholder strings like `"null"`. Off by default — see below. |
96
+ | `placeholder_strings` | conservative set | Set used when string stripping is enabled. |
97
+ | `json_schema_validator_class` | `None` | Override the JSON Schema validator class. `None` → lazy `Draft7Validator`. |
98
+ | `extra_validators` | `None` | Extra `(name, args) -> list[str]` checks for domain rules. |
99
+ | `on_failure` | `"pass"` | After retries are exhausted: `"pass"` (fail open) or `"raise"`. |
100
+
101
+ ## Design decisions for the two thorniest cases
102
+
103
+ ### Batch (partial) failure
104
+
105
+ Providers (Anthropic, Gemini, OpenAI) require that **every** `tool_call` in an
106
+ assistant message receive a matching `ToolMessage` before the next turn. So when
107
+ a multi-call turn has *any* invalid call, the middleware emits:
108
+
109
+ - an **error** `ToolMessage` for each invalid call, and
110
+ - a **"not executed"** notice for each *valid* sibling call (it hasn't run yet —
111
+ we're still inside the model node — so it can't have a real result), asking the
112
+ model to re-issue the whole batch with corrected arguments.
113
+
114
+ The failed `AIMessage` is placed before these `ToolMessage`s, and failed turns
115
+ accumulate across retries so the model sees its repeated mistakes.
116
+
117
+ ### `strip_empty_values` and the write-back contract
118
+
119
+ LLMs (Gemini especially) emit explicit `null`/`{}`/`[]` for optional fields
120
+ instead of omitting them, causing needless validation failures. When stripping
121
+ is on, the **cleaned arguments replace the originals on the tool call**, so what
122
+ we validate is exactly what executes — no soundness gap between validation and
123
+ execution.
124
+
125
+ The trade-off: stripping a value that is *meaningfully empty* (e.g. `tags: []`
126
+ meaning "clear all tags", or `null` meaning "explicitly unset") changes
127
+ behaviour. Container stripping (`None`/`{}`/`[]`) is on by default because it's
128
+ usually safe. **String-placeholder stripping is opt-in only** — tokens like
129
+ `"NA"` (Namibia's ISO code) are legitimate values and must never be dropped
130
+ silently. Enable it deliberately with `strip_placeholder_strings=True` and a set
131
+ you control.
132
+
133
+ ### Fail-open
134
+
135
+ After `max_retries`, the default `on_failure="pass"` returns the last response
136
+ unchanged — the (still-invalid) args reach the tool node, where normal tool
137
+ error handling takes over. This makes the middleware best-effort
138
+ self-correction, not a hard guarantee. Use `on_failure="raise"` if you'd rather
139
+ surface a `ToolArgsValidationError`.
140
+
141
+ ## Extra validators
142
+
143
+ Plug in domain rules without touching core behaviour. A bundled example flags
144
+ LangChain internal message IDs (`lc_<uuid>`) that LLMs sometimes mistake for
145
+ real data identifiers:
146
+
147
+ ```python
148
+ from langchain_tool_args_validation_middleware import detect_langchain_internal_ids
149
+
150
+ ToolArgsValidationMiddleware(extra_validators=[detect_langchain_internal_ids])
151
+ ```
152
+
153
+ ## License
154
+
155
+ MIT
@@ -0,0 +1,122 @@
1
+ # langchain-tool-args-validation-middleware
2
+
3
+ A LangChain agent middleware that validates LLM-generated **tool-call arguments**
4
+ against each tool's schema **before** the tool runs (and before any
5
+ human-in-the-loop approval step). When arguments are invalid it appends error
6
+ `ToolMessage`s and re-invokes the model so it can self-correct — all inside the
7
+ model node, so only the final valid `AIMessage` ever enters the graph state.
8
+
9
+ ```bash
10
+ pip install langchain-tool-args-validation-middleware # Pydantic tools only
11
+ pip install "langchain-tool-args-validation-middleware[jsonschema]" # + MCP / dict-schema tools
12
+ ```
13
+
14
+ ## Why
15
+
16
+ LLMs frequently emit malformed tool calls: missing required fields, wrong types,
17
+ hallucinated empty values, or extra keys. Without validation those reach the
18
+ tool node and cause runtime errors or silent corruption — and in
19
+ human-in-the-loop workflows, a human is asked to approve obviously-broken
20
+ arguments. Catching this at the model boundary lets the agent fix itself in one
21
+ extra model call instead of a full agent-loop iteration.
22
+
23
+ It complements, rather than replaces, `ToolRetryMiddleware` (retries on tool
24
+ *exceptions*) and `ModelRetryMiddleware` (retries on model *exceptions*): this
25
+ one retries on *schema violations*, before execution.
26
+
27
+ ![Trace showing the middleware catching an invalid tool call and prompting the model to self-correct](https://raw.githubusercontent.com/Serjbory/langchain-tool-args-validation-middleware/main/docs/images/trace-example.jpg)
28
+
29
+ *A trace of `create_oos_alert`: the model emitted arguments that violate the
30
+ schema, the middleware rejected them with a precise error and a corrective hint,
31
+ and the model retried — all inside the model node, before the tool ran.*
32
+
33
+ ## Usage
34
+
35
+ ```python
36
+ from langchain.agents import create_agent
37
+ from langchain_tool_args_validation_middleware import ToolArgsValidationMiddleware
38
+
39
+ agent = create_agent(
40
+ model,
41
+ tools=tools,
42
+ middleware=[ToolArgsValidationMiddleware()], # resolves schemas from the agent's tools
43
+ )
44
+ ```
45
+
46
+ Both validation paths are supported automatically:
47
+
48
+ - **Pydantic tools** (`@tool`, or any tool with a `BaseModel` `args_schema`) →
49
+ validated with `BaseModel.model_validate`.
50
+ - **MCP / dict-schema tools** (`args_schema` is a raw JSON Schema `dict`) →
51
+ validated with `jsonschema` (soft dependency, `Draft7Validator` by default).
52
+
53
+ Unknown tools (no resolvable schema) pass through unvalidated.
54
+
55
+ ## Configuration
56
+
57
+ | Parameter | Default | Description |
58
+ |---|---|---|
59
+ | `tools` | `None` | Explicit tool list. If omitted, schemas are resolved lazily from `request.tools` and cached by tool-name set (handles dynamic toolsets). |
60
+ | `max_retries` | `2` | Validation-retry cycles per model invocation (up to `max_retries + 1` model calls). |
61
+ | `strip_empty_values` | `True` | Recursively drop `None` / `{}` / `[]` before validation. |
62
+ | `strip_placeholder_strings` | `False` | Also drop placeholder strings like `"null"`. Off by default — see below. |
63
+ | `placeholder_strings` | conservative set | Set used when string stripping is enabled. |
64
+ | `json_schema_validator_class` | `None` | Override the JSON Schema validator class. `None` → lazy `Draft7Validator`. |
65
+ | `extra_validators` | `None` | Extra `(name, args) -> list[str]` checks for domain rules. |
66
+ | `on_failure` | `"pass"` | After retries are exhausted: `"pass"` (fail open) or `"raise"`. |
67
+
68
+ ## Design decisions for the two thorniest cases
69
+
70
+ ### Batch (partial) failure
71
+
72
+ Providers (Anthropic, Gemini, OpenAI) require that **every** `tool_call` in an
73
+ assistant message receive a matching `ToolMessage` before the next turn. So when
74
+ a multi-call turn has *any* invalid call, the middleware emits:
75
+
76
+ - an **error** `ToolMessage` for each invalid call, and
77
+ - a **"not executed"** notice for each *valid* sibling call (it hasn't run yet —
78
+ we're still inside the model node — so it can't have a real result), asking the
79
+ model to re-issue the whole batch with corrected arguments.
80
+
81
+ The failed `AIMessage` is placed before these `ToolMessage`s, and failed turns
82
+ accumulate across retries so the model sees its repeated mistakes.
83
+
84
+ ### `strip_empty_values` and the write-back contract
85
+
86
+ LLMs (Gemini especially) emit explicit `null`/`{}`/`[]` for optional fields
87
+ instead of omitting them, causing needless validation failures. When stripping
88
+ is on, the **cleaned arguments replace the originals on the tool call**, so what
89
+ we validate is exactly what executes — no soundness gap between validation and
90
+ execution.
91
+
92
+ The trade-off: stripping a value that is *meaningfully empty* (e.g. `tags: []`
93
+ meaning "clear all tags", or `null` meaning "explicitly unset") changes
94
+ behaviour. Container stripping (`None`/`{}`/`[]`) is on by default because it's
95
+ usually safe. **String-placeholder stripping is opt-in only** — tokens like
96
+ `"NA"` (Namibia's ISO code) are legitimate values and must never be dropped
97
+ silently. Enable it deliberately with `strip_placeholder_strings=True` and a set
98
+ you control.
99
+
100
+ ### Fail-open
101
+
102
+ After `max_retries`, the default `on_failure="pass"` returns the last response
103
+ unchanged — the (still-invalid) args reach the tool node, where normal tool
104
+ error handling takes over. This makes the middleware best-effort
105
+ self-correction, not a hard guarantee. Use `on_failure="raise"` if you'd rather
106
+ surface a `ToolArgsValidationError`.
107
+
108
+ ## Extra validators
109
+
110
+ Plug in domain rules without touching core behaviour. A bundled example flags
111
+ LangChain internal message IDs (`lc_<uuid>`) that LLMs sometimes mistake for
112
+ real data identifiers:
113
+
114
+ ```python
115
+ from langchain_tool_args_validation_middleware import detect_langchain_internal_ids
116
+
117
+ ToolArgsValidationMiddleware(extra_validators=[detect_langchain_internal_ids])
118
+ ```
119
+
120
+ ## License
121
+
122
+ MIT
@@ -0,0 +1,72 @@
1
+ # Run with: just <task-name>
2
+ # Install just on mac: brew install just
3
+ # Install just if not present on linux (Debian based)
4
+ # curl --proto '=https' --tlsv1.2 -sSf https://just.systems/install.sh | bash -s -- --to /usr/local/bin
5
+ # To format justfile: just --fmt --unstable
6
+
7
+ # Variables
8
+ src_dir := "src/langchain_tool_args_validation_middleware"
9
+
10
+ # Default recipe to display available commands
11
+ default:
12
+ @just --list
13
+
14
+ # Sync dependencies (including dev extras)
15
+ sync:
16
+ uv sync --extra dev
17
+
18
+ # Auto-format code
19
+ format:
20
+ uv run --extra dev ruff check --select I --fix .
21
+ uv run --extra dev ruff format .
22
+
23
+ # Lint and format check
24
+ lint:
25
+ uv run --extra dev ruff format --check --diff .
26
+ uv run --extra dev ruff check --show-fixes .
27
+ uv run --extra dev mypy {{ src_dir }}
28
+
29
+ # Run unit tests (optional path to narrow scope, e.g. just test tests/test_validation.py)
30
+ test path="tests":
31
+ uv run --extra dev pytest --cov={{ src_dir }} {{ path }}
32
+
33
+ # Run format, lint, and tests to check everything before committing
34
+ pre-commit: format lint test
35
+
36
+ # Build sdist and wheel into dist/
37
+ build:
38
+ rm -rf dist
39
+ uv build
40
+
41
+ # Publish to TestPyPI (local: uses UV_PUBLISH_TOKEN / ~/.pypirc; CI uses trusted publishing)
42
+ publish-test: build
43
+ uv publish --publish-url https://test.pypi.org/legacy/
44
+
45
+ # Publish to PyPI
46
+ publish: build
47
+ uv publish
48
+
49
+ # Bump version (just bump patch|minor|major|rc), commit, and tag
50
+ bump level="patch":
51
+ uv version --bump {{ level }}
52
+ git commit -am "release: v$(uv version --short)"
53
+ git tag "v$(uv version --short)"
54
+ @echo "Pushed nothing yet. Run: git push && git push --tags"
55
+
56
+ # Clean up cache files
57
+ clean:
58
+ find . -type d -name "__pycache__" -exec rm -rf {} + && \
59
+ find . -type f -name "*.pyc" -delete && \
60
+ rm -rf .pytest_cache .coverage htmlcov .mypy_cache .ruff_cache
61
+
62
+ # Install just if not present on mac
63
+ install-just-on-mac:
64
+ @command -v just >/dev/null 2>&1 || { echo "Installing just..."; brew install just; }
65
+
66
+ # Install uv if not present
67
+ install-uv:
68
+ @command -v uv >/dev/null 2>&1 || { echo "Installing uv..."; curl -LsSf https://astral.sh/uv/install.sh | sh; }
69
+
70
+ install-tools:
71
+ uv tool install ruff==0.15.11 --force
72
+ uv tool install mypy==1.18.2 --force
@@ -0,0 +1,69 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "langchain-tool-args-validation-middleware"
7
+ version = "0.1.0"
8
+ description = "LangChain agent middleware that validates LLM-generated tool-call arguments against each tool's schema before tool execution / HITL."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ authors = [{ name = "Serj" }]
13
+ keywords = ["langchain", "agents", "middleware", "tools", "validation", "mcp"]
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Topic :: Software Development :: Libraries",
18
+ ]
19
+
20
+ dependencies = [
21
+ "langchain>=1.0.0",
22
+ "langchain-core>=0.3.0",
23
+ "pydantic>=2.0",
24
+ ]
25
+
26
+ [project.optional-dependencies]
27
+ # jsonschema is a *soft* dependency: only needed when validating tools whose
28
+ # args_schema is a raw JSON Schema dict (e.g. MCP tools).
29
+ jsonschema = ["jsonschema>=4.0"]
30
+ test = [
31
+ "pytest>=8.0",
32
+ "pytest-asyncio>=0.23",
33
+ "pytest-cov>=5.0",
34
+ "jsonschema>=4.0",
35
+ ]
36
+ dev = [
37
+ "ruff",
38
+ "mypy",
39
+ "langchain-tool-args-validation-middleware[test]",
40
+ ]
41
+
42
+ [project.urls]
43
+ Homepage = "https://github.com/Serjbory/langchain-tool-args-validation-middleware"
44
+ Repository = "https://github.com/Serjbory/langchain-tool-args-validation-middleware"
45
+
46
+ [tool.hatch.build.targets.wheel]
47
+ packages = ["src/langchain_tool_args_validation_middleware"]
48
+
49
+ [tool.pytest.ini_options]
50
+ asyncio_mode = "auto"
51
+ testpaths = ["tests"]
52
+ pythonpath = ["."]
53
+
54
+ [tool.ruff]
55
+ line-length = 88
56
+ target-version = "py310"
57
+
58
+ [tool.ruff.lint]
59
+ select = ["E", "F", "I", "UP", "B", "SIM"]
60
+
61
+ [tool.mypy]
62
+ python_version = "3.10"
63
+ strict = true
64
+ warn_unused_ignores = true
65
+
66
+ # jsonschema is a soft dependency and ships no type stubs.
67
+ [[tool.mypy.overrides]]
68
+ module = ["jsonschema", "jsonschema.*"]
69
+ ignore_missing_imports = true
@@ -0,0 +1,24 @@
1
+ """Validate LLM tool-call arguments against each tool's schema before execution."""
2
+
3
+ from ._strip import DEFAULT_PLACEHOLDER_STRINGS, strip_empty
4
+ from ._validation import ValidationIssue
5
+ from .extras import detect_langchain_internal_ids
6
+ from .middleware import (
7
+ ExtraValidator,
8
+ OnFailure,
9
+ ToolArgsValidationError,
10
+ ToolArgsValidationMiddleware,
11
+ )
12
+
13
+ __all__ = [
14
+ "DEFAULT_PLACEHOLDER_STRINGS",
15
+ "ExtraValidator",
16
+ "OnFailure",
17
+ "ToolArgsValidationError",
18
+ "ToolArgsValidationMiddleware",
19
+ "ValidationIssue",
20
+ "detect_langchain_internal_ids",
21
+ "strip_empty",
22
+ ]
23
+
24
+ __version__ = "0.1.0"
@@ -0,0 +1,82 @@
1
+ """Recursive stripping of "empty" values from LLM-generated tool arguments.
2
+
3
+ LLMs (Gemini especially) routinely emit explicit ``null`` or empty containers
4
+ for optional fields instead of omitting them. Stripping these before validation
5
+ avoids unnecessary retries: an optional field simply becomes absent, and a
6
+ required field surfaces a clear ``'<field>' is a required property`` error.
7
+
8
+ Design note — write-back contract
9
+ ----------------------------------
10
+ When stripping is enabled the *cleaned* arguments replace the originals in the
11
+ tool call, so the cleaned version is what both validation **and tool execution**
12
+ see. This keeps "what we validated" and "what runs" identical (no soundness
13
+ gap), at the cost of mutating the model's output. That trade-off is the whole
14
+ point of stripping, but it means stripping a value that is *semantically
15
+ meaningful* (e.g. ``tags: []`` meaning "clear all tags", or ``null`` meaning
16
+ "explicitly unset") changes behaviour. Container stripping (``None``/``{}``/
17
+ ``[]``) is on by default; the far riskier string-placeholder stripping
18
+ (``"none"``, ``"N/A"``, ...) is **opt-in only**, because tokens like ``"NA"``
19
+ are legitimate values (Namibia's ISO code, "North America", ...).
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from typing import Any
25
+
26
+ # A conservative, opt-in default set of placeholder strings. Deliberately
27
+ # excludes ambiguous real-world tokens like "na"/"nil". Callers may pass their
28
+ # own set instead. Only used when string stripping is explicitly enabled.
29
+ DEFAULT_PLACEHOLDER_STRINGS: frozenset[str] = frozenset(
30
+ {"none", "null", "undefined", '""', "''"}
31
+ )
32
+
33
+
34
+ def strip_empty(
35
+ value: Any,
36
+ *,
37
+ placeholder_strings: frozenset[str] | None = None,
38
+ ) -> Any:
39
+ """Return a copy of *value* with "empty" entries recursively removed.
40
+
41
+ Parameters
42
+ ----------
43
+ value:
44
+ The value to clean (typically a tool call's ``args`` dict, but works on
45
+ any nested dict/list structure).
46
+ placeholder_strings:
47
+ If provided, string values whose stripped/lower-cased form is in this
48
+ set are also removed. ``None`` (the default) disables string stripping
49
+ entirely — only ``None``/``{}``/``[]`` are removed.
50
+
51
+ Notes
52
+ -----
53
+ Returns a new structure; it never mutates *value* in place. The caller
54
+ decides whether to write the result back onto the tool call.
55
+ """
56
+ if isinstance(value, dict):
57
+ cleaned: dict[Any, Any] = {}
58
+ for key, val in value.items():
59
+ if _is_empty(val, placeholder_strings):
60
+ continue
61
+ cleaned[key] = strip_empty(val, placeholder_strings=placeholder_strings)
62
+ return cleaned
63
+ if isinstance(value, list):
64
+ return [
65
+ strip_empty(item, placeholder_strings=placeholder_strings)
66
+ for item in value
67
+ if not _is_empty(item, placeholder_strings)
68
+ ]
69
+ return value
70
+
71
+
72
+ def _is_empty(value: Any, placeholder_strings: frozenset[str] | None) -> bool:
73
+ """Whether *value* should be dropped during stripping."""
74
+ if value is None:
75
+ return True
76
+ if value == {} or value == []:
77
+ return True
78
+ return (
79
+ placeholder_strings is not None
80
+ and isinstance(value, str)
81
+ and value.strip().lower() in placeholder_strings
82
+ )