PyPI - langchain-content-normalizer - Versions diffs - 0.1.0__tar.gz - Mend

langchain-content-normalizer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

langchain_content_normalizer-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml ADDED Viewed

@@ -0,0 +1,30 @@
+name: Bug report
+description: Report incorrect content normalization behavior.
+title: "[Bug]: "
+labels: [bug]
+body:
+  - type: textarea
+    id: input
+    attributes:
+      label: Input content shape
+      description: Paste the minimal content shape that fails.
+      render: python
+    validations:
+      required: true
+  - type: textarea
+    id: expected
+    attributes:
+      label: Expected output
+    validations:
+      required: true
+  - type: textarea
+    id: actual
+    attributes:
+      label: Actual output
+    validations:
+      required: true
+  - type: input
+    id: version
+    attributes:
+      label: Package version
+      placeholder: 0.1.0

langchain_content_normalizer-0.1.0/.github/ISSUE_TEMPLATE/feature_request.yml ADDED Viewed

@@ -0,0 +1,19 @@
+name: Feature request
+description: Suggest a new content shape, provider adapter, or option.
+title: "[Feature]: "
+labels: [enhancement]
+body:
+  - type: textarea
+    id: problem
+    attributes:
+      label: Problem
+      description: What content shape or workflow is not covered today?
+    validations:
+      required: true
+  - type: textarea
+    id: proposal
+    attributes:
+      label: Proposal
+      description: Describe the behavior you want.
+    validations:
+      required: true

langchain_content_normalizer-0.1.0/.github/dependabot.yml ADDED Viewed

@@ -0,0 +1,10 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
+  - package-ecosystem: "uv"
+    directory: "/"
+    schedule:
+      interval: "monthly"

langchain_content_normalizer-0.1.0/.github/pull_request_template.md ADDED Viewed

@@ -0,0 +1,9 @@
+## Summary
+## Checklist
+- [ ] Added or updated tests.
+- [ ] `uv run ruff check .` passes.
+- [ ] `uv run pytest` passes.
+- [ ] Runtime dependencies are still zero, or the dependency is justified.
+- [ ] Unknown non-empty content is not silently dropped.

langchain_content_normalizer-0.1.0/.github/workflows/build.yml ADDED Viewed

@@ -0,0 +1,27 @@
+name: Build
+on:
+  push:
+    branches: [main]
+    tags: ["v*"]
+  pull_request:
+    branches: [main]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Build package
+        run: uv build
+      - name: Upload distributions
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/*

langchain_content_normalizer-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,25 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: uv sync --dev
+      - name: Lint
+        run: uv run ruff check .
+      - name: Test
+        run: uv run pytest

langchain_content_normalizer-0.1.0/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,22 @@
+name: Publish to PyPI
+on:
+  workflow_dispatch:
+  release:
+    types: [published]
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Build package
+        run: uv build
+      - name: Publish package
+        uses: pypa/gh-action-pypi-publish@release/v1

langchain_content_normalizer-0.1.0/.github/workflows/smoke.yml ADDED Viewed

@@ -0,0 +1,23 @@
+name: Smoke
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  smoke:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: uv sync --dev
+      - name: Run smoke test
+        run: uv run python scripts/smoke.py

langchain_content_normalizer-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,10 @@
+.venv/
+__pycache__/
+.pytest_cache/
+.ruff_cache/
+*.egg-info/
+dist/
+build/
+.coverage
+htmlcov/
+.DS_Store

langchain_content_normalizer-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,15 @@
+# Changelog
+## Unreleased
+- Added maintainer documentation, release process, examples, and issue/PR templates.
+- Added build and manual publish workflows.
+## 0.1.0
+Initial public release.
+- Added text normalization for strings, Anthropic blocks, MCP objects, tool results, and message-like wrappers.
+- Added tool output truncation helper.
+- Added provider-aware multimodal image payload builders.
+- Added tests and CI.

langchain_content_normalizer-0.1.0/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,25 @@
+# Contributing
+Thanks for helping improve `langchain-content-normalizer`.
+## Development setup
+```bash
+uv sync --dev
+uv run ruff check .
+uv run pytest
+```
+## Contribution guidelines
+- Keep runtime dependencies at zero unless there is a strong reason.
+- Prefer duck typing over importing LangChain, MCP, or provider SDK classes.
+- Add tests for every new content shape or provider format.
+- Do not silently drop unknown non-empty content. Preserve it with a safe fallback.
+- Keep public APIs small and documented in `README.md`.
+## Useful PRs
+- New MCP or LangChain content fixtures.
+- Better multimodal provider adapters.
+- Strict-mode behavior for applications that prefer errors over fallback strings.

langchain_content_normalizer-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Benjamin Jornet
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

langchain_content_normalizer-0.1.0/MAINTAINERS.md ADDED Viewed

@@ -0,0 +1,20 @@
+# Maintainers
+## Primary maintainer
+- Benjamin Jornet (`@BenjaminJornet`)
+## Maintainer responsibilities
+- Review pull requests that add new content shapes or provider adapters.
+- Triage issues with reproducible input/output examples.
+- Keep CI, packaging, and release workflows working.
+- Preserve the zero-runtime-dependency contract unless a change is explicitly justified.
+- Publish releases and update `CHANGELOG.md`.
+## Review priorities
+1. Unknown non-empty content must not be silently dropped.
+2. Runtime dependencies should remain at zero.
+3. Tests must cover each new block shape.
+4. Public APIs should stay small and documented.

langchain_content_normalizer-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,131 @@
+Metadata-Version: 2.4
+Name: langchain-content-normalizer
+Version: 0.1.0
+Summary: Normalize LangChain, MCP, and multimodal content blocks into provider-ready text and image payloads.
+Project-URL: Homepage, https://github.com/benjaminjornet/langchain-content-normalizer
+Project-URL: Issues, https://github.com/benjaminjornet/langchain-content-normalizer/issues
+Author-email: Benjamin Jornet <benjamin.jornet@gmail.com>
+License: MIT License
+        Copyright (c) 2026 Benjamin Jornet
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+License-File: LICENSE
+Keywords: content-normalization,langchain,llm,mcp,multimodal
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+# langchain-content-normalizer
+[![CI](https://github.com/BenjaminJornet/langchain-content-normalizer/actions/workflows/ci.yml/badge.svg)](https://github.com/BenjaminJornet/langchain-content-normalizer/actions/workflows/ci.yml)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
+[![Python](https://img.shields.io/badge/python-3.11%2B-blue.svg)](pyproject.toml)
+Normalize the messy content shapes produced by LangChain, MCP tools, Anthropic content blocks, and multimodal chat APIs.
+The package has no runtime dependencies. It works by duck typing instead of importing LangChain or MCP classes.
+## What it solves
+LLM agent stacks often receive content as one of many incompatible shapes:
+| Source | Example shape | Output |
+| --- | --- | --- |
+| Classic chat | `"plain text"` | `"plain text"` |
+| Anthropic blocks | `[{"type": "text", "text": "hi"}]` | `"hi"` |
+| Tool calls | `[{"type": "tool_use", ...}]` | skipped by default |
+| MCP tool results | `[{"type": "tool_result", "content": [...]}]` | flattened text |
+| MCP objects | objects exposing `.text` | extracted text |
+| Message wrappers | objects exposing `.content` | recursively normalized |
+## Install
+```bash
+uv add langchain-content-normalizer
+```
+## Text normalization
+```python
+from lc_content_normalizer import extract_text_content, normalize_tool_output
+content = [
+    {"type": "text", "text": "Reading logs..."},
+    {"type": "tool_use", "name": "tail_logs", "input": {"service": "api"}},
+]
+assert extract_text_content(content) == "Reading logs..."
+assert "tail_logs" in extract_text_content(content, skip_tool_use=False)
+safe_output = normalize_tool_output(huge_tool_payload, max_chars=50_000)
+```
+## Vision format routing
+```python
+from lc_content_normalizer import build_human_message_content, detect_vision_format
+vision_format = detect_vision_format("anthropic", "claude-3-5-sonnet")
+content = build_human_message_content(
+    "Explain this alert screenshot",
+    images=[{"data_url": "data:image/png;base64,...", "mime_type": "image/png"}],
+    vision_format=vision_format,
+)
+```
+`detect_vision_format()` returns:
+| Provider/model | Format |
+| --- | --- |
+| `anthropic` | native Anthropic `image` block with `source.base64` |
+| `ollama` + `llava`/`vision` model name | OpenAI-compatible `image_url` block |
+| `ollama` text-only model | `none`, images are dropped |
+| OpenAI-compatible providers | OpenAI-compatible `image_url` block |
+## Examples
+- `examples/normalize_mcp_output.py` shows how MCP-style tool results are flattened.
+- `examples/build_vision_content.py` shows provider-aware image block generation.
+## Roadmap
+- Add strict mode for unknown content blocks.
+- Add more MCP fixture coverage.
+- Add provider-specific adapters as content formats evolve.
+- Keep runtime dependencies at zero.
+## Development
+```bash
+uv sync --dev
+uv run ruff check .
+uv run pytest
+uv run python scripts/smoke.py
+```
+## License
+MIT

langchain_content_normalizer-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,91 @@
+# langchain-content-normalizer
+[![CI](https://github.com/BenjaminJornet/langchain-content-normalizer/actions/workflows/ci.yml/badge.svg)](https://github.com/BenjaminJornet/langchain-content-normalizer/actions/workflows/ci.yml)
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
+[![Python](https://img.shields.io/badge/python-3.11%2B-blue.svg)](pyproject.toml)
+Normalize the messy content shapes produced by LangChain, MCP tools, Anthropic content blocks, and multimodal chat APIs.
+The package has no runtime dependencies. It works by duck typing instead of importing LangChain or MCP classes.
+## What it solves
+LLM agent stacks often receive content as one of many incompatible shapes:
+| Source | Example shape | Output |
+| --- | --- | --- |
+| Classic chat | `"plain text"` | `"plain text"` |
+| Anthropic blocks | `[{"type": "text", "text": "hi"}]` | `"hi"` |
+| Tool calls | `[{"type": "tool_use", ...}]` | skipped by default |
+| MCP tool results | `[{"type": "tool_result", "content": [...]}]` | flattened text |
+| MCP objects | objects exposing `.text` | extracted text |
+| Message wrappers | objects exposing `.content` | recursively normalized |
+## Install
+```bash
+uv add langchain-content-normalizer
+```
+## Text normalization
+```python
+from lc_content_normalizer import extract_text_content, normalize_tool_output
+content = [
+    {"type": "text", "text": "Reading logs..."},
+    {"type": "tool_use", "name": "tail_logs", "input": {"service": "api"}},
+]
+assert extract_text_content(content) == "Reading logs..."
+assert "tail_logs" in extract_text_content(content, skip_tool_use=False)
+safe_output = normalize_tool_output(huge_tool_payload, max_chars=50_000)
+```
+## Vision format routing
+```python
+from lc_content_normalizer import build_human_message_content, detect_vision_format
+vision_format = detect_vision_format("anthropic", "claude-3-5-sonnet")
+content = build_human_message_content(
+    "Explain this alert screenshot",
+    images=[{"data_url": "data:image/png;base64,...", "mime_type": "image/png"}],
+    vision_format=vision_format,
+)
+```
+`detect_vision_format()` returns:
+| Provider/model | Format |
+| --- | --- |
+| `anthropic` | native Anthropic `image` block with `source.base64` |
+| `ollama` + `llava`/`vision` model name | OpenAI-compatible `image_url` block |
+| `ollama` text-only model | `none`, images are dropped |
+| OpenAI-compatible providers | OpenAI-compatible `image_url` block |
+## Examples
+- `examples/normalize_mcp_output.py` shows how MCP-style tool results are flattened.
+- `examples/build_vision_content.py` shows provider-aware image block generation.
+## Roadmap
+- Add strict mode for unknown content blocks.
+- Add more MCP fixture coverage.
+- Add provider-specific adapters as content formats evolve.
+- Keep runtime dependencies at zero.
+## Development
+```bash
+uv sync --dev
+uv run ruff check .
+uv run pytest
+uv run python scripts/smoke.py
+```
+## License
+MIT

langchain_content_normalizer-0.1.0/docs/release-process.md ADDED Viewed

@@ -0,0 +1,33 @@
+# Release Process
+This project uses semantic versioning while the API is pre-1.0.
+## Checklist
+1. Run `uv run ruff check .`.
+2. Run `uv run pytest`.
+3. Run `uv build`.
+4. Update `CHANGELOG.md`.
+5. Create a Git tag, for example `v0.1.1`.
+6. Push the tag.
+7. Create a GitHub release.
+8. Publish to PyPI through the manual `Publish to PyPI` workflow.
+## Smoke checks
+```bash
+uv run python examples/normalize_mcp_output.py
+uv run python examples/build_vision_content.py
+```
+## Release notes format
+```md
+## vX.Y.Z
+### Added
+### Fixed
+### Changed
+```

langchain_content_normalizer-0.1.0/examples/build_vision_content.py ADDED Viewed

@@ -0,0 +1,10 @@
+from __future__ import annotations
+from lc_content_normalizer import build_human_message_content, detect_vision_format
+image = {"data_url": "data:image/png;base64,abc123", "mime_type": "image/png"}
+for provider, model in [("anthropic", "claude"), ("ollama", "llava"), ("ollama", "llama")]:
+    vision_format = detect_vision_format(provider, model)
+    content = build_human_message_content("Explain this screenshot", [image], vision_format)
+    print(provider, model, vision_format, content)

langchain_content_normalizer-0.1.0/examples/normalize_mcp_output.py ADDED Viewed

@@ -0,0 +1,20 @@
+from __future__ import annotations
+from lc_content_normalizer import extract_text_content, normalize_tool_output
+class FakeTextContent:
+    def __init__(self, text: str) -> None:
+        self.type = "text"
+        self.text = text
+raw_tool_output = [
+    {
+        "type": "tool_result",
+        "content": [FakeTextContent("service=api status=healthy\n")],
+    }
+]
+print(extract_text_content(raw_tool_output))
+print(normalize_tool_output(raw_tool_output, max_chars=80))

langchain_content_normalizer-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,46 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "langchain-content-normalizer"
+version = "0.1.0"
+description = "Normalize LangChain, MCP, and multimodal content blocks into provider-ready text and image payloads."
+readme = "README.md"
+requires-python = ">=3.11"
+license = { file = "LICENSE" }
+authors = [{ name = "Benjamin Jornet", email = "benjamin.jornet@gmail.com" }]
+keywords = ["langchain", "mcp", "llm", "multimodal", "content-normalization"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+]
+dependencies = []
+[project.urls]
+Homepage = "https://github.com/benjaminjornet/langchain-content-normalizer"
+Issues = "https://github.com/benjaminjornet/langchain-content-normalizer/issues"
+[dependency-groups]
+dev = [
+    "pytest>=8.0",
+    "ruff>=0.8",
+]
+[tool.hatch.build.targets.wheel]
+packages = ["src/lc_content_normalizer"]
+[tool.ruff]
+line-length = 100
+target-version = "py311"
+[tool.ruff.lint]
+select = ["E", "F", "I", "UP", "B", "SIM"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]

langchain_content_normalizer-0.1.0/scripts/smoke.py ADDED Viewed

@@ -0,0 +1,23 @@
+from __future__ import annotations
+from lc_content_normalizer import build_human_message_content, extract_text_content
+def main() -> None:
+    text = extract_text_content(
+        [{"type": "tool_result", "content": [{"type": "text", "text": "ok"}]}]
+    )
+    assert text == "ok"
+    image_content = build_human_message_content(
+        "describe",
+        [{"data_url": "data:image/png;base64,abc", "mime_type": "image/png"}],
+        "openai",
+    )
+    assert isinstance(image_content, list)
+    assert image_content[1]["type"] == "image_url"
+    print("smoke ok")
+if __name__ == "__main__":
+    main()

langchain_content_normalizer-0.1.0/src/lc_content_normalizer/__init__.py ADDED Viewed

@@ -0,0 +1,20 @@
+from .text import extract_text_content, normalize_tool_output
+from .vision import (
+    VISION_FORMAT_ANTHROPIC_NATIVE,
+    VISION_FORMAT_NONE,
+    VISION_FORMAT_OPENAI,
+    build_human_message_content,
+    detect_vision_format,
+    image_block_for_format,
+)
+__all__ = [
+    "VISION_FORMAT_ANTHROPIC_NATIVE",
+    "VISION_FORMAT_NONE",
+    "VISION_FORMAT_OPENAI",
+    "build_human_message_content",
+    "detect_vision_format",
+    "extract_text_content",
+    "image_block_for_format",
+    "normalize_tool_output",
+]

langchain_content_normalizer-0.1.0/src/lc_content_normalizer/text.py ADDED Viewed

@@ -0,0 +1,74 @@
+from __future__ import annotations
+from typing import Any
+def extract_text_content(content: Any, *, skip_tool_use: bool = True) -> str:
+    """Normalize LangChain, Anthropic, and MCP content shapes to plain text.
+    Supported inputs include strings, Anthropic-style content block lists,
+    MCP TextContent-like objects exposing ``.text``, and message-like objects
+    exposing ``.content``. Unknown non-empty block lists fall back to ``str`` so
+    tool outputs are not silently lost.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        saw_known_block = False
+        for block in content:
+            if isinstance(block, str):
+                parts.append(block)
+                saw_known_block = True
+                continue
+            if isinstance(block, dict):
+                block_type = block.get("type")
+                if block_type == "text" and isinstance(block.get("text"), str):
+                    parts.append(block["text"])
+                    saw_known_block = True
+                elif block_type == "tool_result":
+                    parts.append(
+                        extract_text_content(block.get("content", ""), skip_tool_use=skip_tool_use)
+                    )
+                    saw_known_block = True
+                elif block_type == "tool_use":
+                    saw_known_block = True
+                    if not skip_tool_use:
+                        parts.append(str(block.get("input", "")))
+                elif block_type in {"image", "image_url"}:
+                    saw_known_block = True
+                continue
+            text_attr = getattr(block, "text", None)
+            if isinstance(text_attr, str):
+                parts.append(text_attr)
+                saw_known_block = True
+                continue
+            parts.append(str(block))
+        result = "".join(parts)
+        if not result and content and not saw_known_block:
+            return str(content)
+        return result
+    inner = getattr(content, "content", None)
+    if inner is not None and inner is not content:
+        return extract_text_content(inner, skip_tool_use=skip_tool_use)
+    text_attr = getattr(content, "text", None)
+    if isinstance(text_attr, str):
+        return text_attr
+    return str(content)
+def normalize_tool_output(raw: Any, *, max_chars: int = 50_000) -> str:
+    """Extract a readable tool output string and truncate oversized payloads."""
+    text = extract_text_content(raw)
+    if len(text) <= max_chars:
+        return text
+    omitted = len(text) - max_chars
+    return text[:max_chars] + f"\n\n[...truncated {omitted} chars]"

langchain_content_normalizer-0.1.0/src/lc_content_normalizer/vision.py ADDED Viewed

@@ -0,0 +1,51 @@
+from __future__ import annotations
+from typing import Any
+VISION_FORMAT_OPENAI = "openai"
+VISION_FORMAT_ANTHROPIC_NATIVE = "anthropic_native"
+VISION_FORMAT_NONE = "none"
+def detect_vision_format(provider: str, model: str) -> str:
+    """Pick the multimodal block format expected by a provider/model pair."""
+    provider_name = (provider or "").lower()
+    model_name = (model or "").lower()
+    if provider_name == "anthropic":
+        return VISION_FORMAT_ANTHROPIC_NATIVE
+    if provider_name == "ollama":
+        if "llava" in model_name or "vision" in model_name:
+            return VISION_FORMAT_OPENAI
+        return VISION_FORMAT_NONE
+    return VISION_FORMAT_OPENAI
+def image_block_for_format(image: dict[str, str], vision_format: str) -> dict[str, Any]:
+    """Render one image into the multimodal block expected by the target API."""
+    if vision_format == VISION_FORMAT_ANTHROPIC_NATIVE:
+        data_url = image.get("data_url", "")
+        header, _, b64_data = data_url.partition(",")
+        media_type = image.get("mime_type") or header.removeprefix("data:").split(";", 1)[0]
+        return {
+            "type": "image",
+            "source": {"type": "base64", "media_type": media_type, "data": b64_data},
+        }
+    return {"type": "image_url", "image_url": {"url": image["data_url"]}}
+def build_human_message_content(
+    text: str,
+    images: list[dict[str, str]] | None = None,
+    vision_format: str = VISION_FORMAT_OPENAI,
+) -> str | list[dict[str, Any]]:
+    """Build text-only or multimodal human message content for LangChain."""
+    if not images or vision_format == VISION_FORMAT_NONE:
+        return text
+    content: list[dict[str, Any]] = []
+    if text:
+        content.append({"type": "text", "text": text})
+    for image in images:
+        content.append(image_block_for_format(image, vision_format))
+    return content

langchain_content_normalizer-0.1.0/tests/test_text.py ADDED Viewed

@@ -0,0 +1,83 @@
+from __future__ import annotations
+from lc_content_normalizer import extract_text_content, normalize_tool_output
+class FakeMessage:
+    def __init__(self, content):
+        self.content = content
+class FakeTextContent:
+    def __init__(self, text: str):
+        self.type = "text"
+        self.text = text
+def test_string_passthrough():
+    assert extract_text_content("hello") == "hello"
+def test_none_returns_empty():
+    assert extract_text_content(None) == ""
+def test_anthropic_text_blocks_are_concatenated_and_tool_use_is_skipped():
+    content = [
+        {"type": "text", "text": "The file "},
+        {"type": "tool_use", "name": "read_file", "input": {"path": "app.py"}},
+        {"type": "text", "text": "is ready."},
+    ]
+    assert extract_text_content(content) == "The file is ready."
+def test_tool_use_can_be_included_explicitly():
+    content = [{"type": "tool_use", "input": {"path": "app.py"}}]
+    assert "app.py" in extract_text_content(content, skip_tool_use=False)
+def test_tool_result_nested_content_is_flattened():
+    content = [{"type": "tool_result", "content": [{"type": "text", "text": "inner"}]}]
+    assert extract_text_content(content) == "inner"
+def test_mcp_text_content_object_is_extracted():
+    assert extract_text_content(FakeTextContent("from MCP")) == "from MCP"
+def test_mcp_text_content_list_is_extracted():
+    assert extract_text_content([FakeTextContent("from MCP")]) == "from MCP"
+def test_message_like_content_is_unwrapped():
+    assert extract_text_content(FakeMessage([{"type": "text", "text": "wrapped"}])) == "wrapped"
+def test_unknown_dict_is_preserved_as_string():
+    result = extract_text_content({"status": "ok", "count": 2})
+    assert "status" in result
+    assert "2" in result
+def test_unknown_block_list_does_not_silently_disappear():
+    result = extract_text_content([{"request_id": "abc", "message": "hello"}])
+    assert "request_id" in result
+    assert "hello" in result
+def test_image_blocks_are_dropped():
+    content = [{"type": "text", "text": "screenshot:"}, {"type": "image", "source": {}}]
+    assert extract_text_content(content) == "screenshot:"
+def test_normalize_tool_output_truncates_large_payloads():
+    result = normalize_tool_output("x" * 20, max_chars=10)
+    assert result.startswith("x" * 10)
+    assert "truncated 10 chars" in result

langchain_content_normalizer-0.1.0/tests/test_vision.py ADDED Viewed

@@ -0,0 +1,60 @@
+from __future__ import annotations
+from lc_content_normalizer import (
+    VISION_FORMAT_ANTHROPIC_NATIVE,
+    VISION_FORMAT_NONE,
+    VISION_FORMAT_OPENAI,
+    build_human_message_content,
+    detect_vision_format,
+    image_block_for_format,
+)
+PNG = {"data_url": "data:image/png;base64,abc123", "mime_type": "image/png"}
+def test_detect_vision_format_anthropic_native():
+    assert detect_vision_format("anthropic", "claude-3-5-sonnet") == VISION_FORMAT_ANTHROPIC_NATIVE
+def test_detect_vision_format_ollama_vision_models_use_openai_blocks():
+    assert detect_vision_format("ollama", "llava:13b") == VISION_FORMAT_OPENAI
+    assert detect_vision_format("ollama", "model-with-vision") == VISION_FORMAT_OPENAI
+def test_detect_vision_format_ollama_text_only():
+    assert detect_vision_format("ollama", "llama3") == VISION_FORMAT_NONE
+def test_detect_vision_format_openai_compatible_default():
+    assert detect_vision_format("openrouter", "gpt-4o-mini") == VISION_FORMAT_OPENAI
+def test_image_block_for_openai_format():
+    assert image_block_for_format(PNG, VISION_FORMAT_OPENAI) == {
+        "type": "image_url",
+        "image_url": {"url": "data:image/png;base64,abc123"},
+    }
+def test_image_block_for_anthropic_native_format():
+    block = image_block_for_format(PNG, VISION_FORMAT_ANTHROPIC_NATIVE)
+    assert block["type"] == "image"
+    assert block["source"] == {"type": "base64", "media_type": "image/png", "data": "abc123"}
+def test_build_human_message_content_returns_text_without_images():
+    assert build_human_message_content("hello", []) == "hello"
+def test_build_human_message_content_drops_images_when_no_vision():
+    assert build_human_message_content("hello", [PNG], VISION_FORMAT_NONE) == "hello"
+def test_build_human_message_content_returns_multimodal_blocks():
+    content = build_human_message_content("hello", [PNG], VISION_FORMAT_OPENAI)
+    assert content == [
+        {"type": "text", "text": "hello"},
+        {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc123"}},
+    ]

langchain_content_normalizer-0.1.0/uv.lock ADDED Viewed

@@ -0,0 +1,108 @@
+version = 1
+revision = 3
+requires-python = ">=3.11"
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
+]
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
+]
+[[package]]
+name = "langchain-content-normalizer"
+version = "0.1.0"
+source = { editable = "." }
+[package.dev-dependencies]
+dev = [
+    { name = "pytest" },
+    { name = "ruff" },
+]
+[package.metadata]
+[package.metadata.requires-dev]
+dev = [
+    { name = "pytest", specifier = ">=8.0" },
+    { name = "ruff", specifier = ">=0.8" },
+]
+[[package]]
+name = "packaging"
+version = "26.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d7/f1/e7a6dd94a8d4a5626c03e4e99c87f241ba9e350cd9e6d75123f992427270/packaging-26.2.tar.gz", hash = "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661", size = 228134, upload-time = "2026-04-24T20:15:23.917Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/df/b2/87e62e8c3e2f4b32e5fe99e0b86d576da1312593b39f47d8ceef365e95ed/packaging-26.2-py3-none-any.whl", hash = "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", size = 100195, upload-time = "2026-04-24T20:15:22.081Z" },
+]
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
+]
+[[package]]
+name = "pygments"
+version = "2.20.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" },
+]
+[[package]]
+name = "pytest"
+version = "9.0.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" },
+]
+[[package]]
+name = "ruff"
+version = "0.15.15"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/84/6f/a76f7d96e5c962f5b69cee865e49c15c1116897c01990faa8a57edb62e7f/ruff-0.15.15.tar.gz", hash = "sha256:b8dff018130b46d8e5bf0f926ef6b60cf871d6d5ae45fc9334e09632daa741d6", size = 4706985, upload-time = "2026-05-28T14:16:57.784Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/9d/3a45c05b8ab04b4705989de70a79008e27c8003296a0feaee9edc18dd7e9/ruff-0.15.15-py3-none-linux_armv6l.whl", hash = "sha256:cf93e5388f412e1b108b1f8b34a6e036b70fe8aff89393befad96fe48670311b", size = 10710652, upload-time = "2026-05-28T14:16:06.701Z" },
+    { url = "https://files.pythonhosted.org/packages/05/66/da974431624bf3b49f6ee1f9543c02d929ff1cba78b0d5a79c38cf21f744/ruff-0.15.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ac5a646d1f6a7dadd5d50842dae2c1f9862ac887ef5d1b1375e02def791fde6e", size = 11096615, upload-time = "2026-05-28T14:16:23.313Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/09/7443452e5d290230a712103f2fdceeef7184f3ec99a2bd01c8be78aaceb5/ruff-0.15.15-py3-none-macosx_11_0_arm64.whl", hash = "sha256:77d955a431430c66f72dd94e379ad38a16daea3d25094872ac4edf9e797be530", size = 10436683, upload-time = "2026-05-28T14:16:40.974Z" },
+    { url = "https://files.pythonhosted.org/packages/53/01/d330c26a57fa4f3943a14424904027428315b700fe4d14a84bb123a649e5/ruff-0.15.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7614ee79c69788cf6cedd568069ade9cecc22a1ad20494efe8d0c9ebb4b622d4", size = 10769064, upload-time = "2026-05-28T14:16:28.905Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/85/cc8770f8bdff541b1da8392d1634141fe4a0e3f4ee596605959b7906c27f/ruff-0.15.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3cdb1679e06a1f6b47bc384714ae96f6e2fb65ca441eb78c43d2ca554176ce1f", size = 10511987, upload-time = "2026-05-28T14:16:43.732Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/29/8c190c1472b63013583ba391f3342036e02010544c1270455ed8e519bdf3/ruff-0.15.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2728b93d7b23a603ea2c0ac6eb73d760bd38ec9de35f35fb41e18f7a3fee7622", size = 11275100, upload-time = "2026-05-28T14:16:55.244Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/6b/7e145ce2cc8e63d6834eca03d83a0e18d121def5c69f91b4cf4011ed4879/ruff-0.15.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be582fcc0db438902c7792b08d6ddf6c9b9e21addaa10092c2c741cfb09e5a45", size = 12176903, upload-time = "2026-05-28T14:16:14.368Z" },
+    { url = "https://files.pythonhosted.org/packages/80/a3/d5974637f68e451f7fadf015cf3101d1cd7d8ba5027cffe0b9e3826ebe6b/ruff-0.15.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7aa77465b8ecaf1a27bea098d696f7fed5e1eccbd10b321b682d6de586ae5627", size = 11404550, upload-time = "2026-05-28T14:16:20.138Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/1c/e6e5e568f22be4fb05d6244234aba384c06b451252453b821e1a529263cf/ruff-0.15.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48decfa11d740de4889de623be1463308346312f2409a56e24aa280c86162dc4", size = 11382027, upload-time = "2026-05-28T14:16:46.615Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/01/170921b49fcd2e8858825593f91cf7146c3e40a5c3e6df763e4bb0484dde/ruff-0.15.15-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a5015088452ca0081387063649ec67f06d3d1d6b8b936a1f836b5e9657ecd48c", size = 11366041, upload-time = "2026-05-28T14:16:26.247Z" },
+    { url = "https://files.pythonhosted.org/packages/87/54/a7bad711d7de93254e15e06a4c375b89a03d18de45d3e5dcc86a4472fb1a/ruff-0.15.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:f5294aab6356c81600fcdea3a62bb1b924dfd5e91767c12318d3f68f86af57cd", size = 10741795, upload-time = "2026-05-28T14:16:17.11Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/31/38c075963668f8b41c6914ee0f6f318727fbe30ab9145cb29e6df464c5fa/ruff-0.15.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:db5bd4d802415cca656dc1616070b725952d6ae95eb5d4831e49fbd94a38f75f", size = 10511117, upload-time = "2026-05-28T14:16:31.767Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/96/6ff689e1f7e375d1d97075eca022f74c2bab59554a432fe4d2e6f091986a/ruff-0.15.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:587a6278ed42059191c1a466e490bd7930fb50bd2e255398bc29616c895a61cb", size = 10994867, upload-time = "2026-05-28T14:16:35.149Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/c2/5dce0ab9f92a8d534fa62b9bf9caca3eddb8c1a81b616f5e195ada4f0d6e/ruff-0.15.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:df0c1c084f5f4be9812f61518a45c440d3c30d69ce4bf6c5270e66d38338f02a", size = 11482101, upload-time = "2026-05-28T14:16:49.598Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/c0/1003b60edd697c649faf61f1a34094b1abb38fb3d1181e3f895781250a08/ruff-0.15.15-py3-none-win32.whl", hash = "sha256:29428ea79694afbe756d45fd59b36f22b6b020dc0443cf7de0173046236964b9", size = 10716774, upload-time = "2026-05-28T14:16:52.337Z" },
+    { url = "https://files.pythonhosted.org/packages/02/a8/1269eddd6945a06c23f055ef7848886e37cf9d6a8bebb386a3115f01470c/ruff-0.15.15-py3-none-win_amd64.whl", hash = "sha256:8df0323902e15e24bc4bf246da830573d3cf3352bd0b9a164eab335d111ff4a4", size = 11868463, upload-time = "2026-05-28T14:16:11.333Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/b2/920464c907b191e37469d477a1aa8bc048b8f36c4c1610dfa4ab87b39e18/ruff-0.15.15-py3-none-win_arm64.whl", hash = "sha256:3c8ceca6792f38196b8f589bc92eccd03eef286602da92e5dc05cc42ef6441b7", size = 11138498, upload-time = "2026-05-28T14:16:38.425Z" },
+]