PyPI - langchain - Versions diffs - 1.0.0a12__py3-none-any.whl → 1.0.4__py3-none-any.whl - Mend

langchain 1.0.0a12py3-none-any.whl → 1.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

langchain/__init__.py +1 -1
langchain/agents/__init__.py +7 -1
langchain/agents/factory.py +722 -226
langchain/agents/middleware/__init__.py +36 -9
langchain/agents/middleware/_execution.py +388 -0
langchain/agents/middleware/_redaction.py +350 -0
langchain/agents/middleware/context_editing.py +46 -17
langchain/agents/middleware/file_search.py +382 -0
langchain/agents/middleware/human_in_the_loop.py +220 -173
langchain/agents/middleware/model_call_limit.py +43 -10
langchain/agents/middleware/model_fallback.py +79 -36
langchain/agents/middleware/pii.py +68 -504
langchain/agents/middleware/shell_tool.py +718 -0
langchain/agents/middleware/summarization.py +2 -2
langchain/agents/middleware/{planning.py → todo.py} +35 -16
langchain/agents/middleware/tool_call_limit.py +308 -114
langchain/agents/middleware/tool_emulator.py +200 -0
langchain/agents/middleware/tool_retry.py +384 -0
langchain/agents/middleware/tool_selection.py +25 -21
langchain/agents/middleware/types.py +714 -257
langchain/agents/structured_output.py +37 -27
langchain/chat_models/__init__.py +7 -1
langchain/chat_models/base.py +192 -190
langchain/embeddings/__init__.py +13 -3
langchain/embeddings/base.py +49 -29
langchain/messages/__init__.py +50 -1
langchain/tools/__init__.py +9 -7
langchain/tools/tool_node.py +16 -1174
langchain-1.0.4.dist-info/METADATA +92 -0
langchain-1.0.4.dist-info/RECORD +34 -0
langchain/_internal/__init__.py +0 -0
langchain/_internal/_documents.py +0 -35
langchain/_internal/_lazy_import.py +0 -35
langchain/_internal/_prompts.py +0 -158
langchain/_internal/_typing.py +0 -70
langchain/_internal/_utils.py +0 -7
langchain/agents/_internal/__init__.py +0 -1
langchain/agents/_internal/_typing.py +0 -13
langchain/agents/middleware/prompt_caching.py +0 -86
langchain/documents/__init__.py +0 -7
langchain/embeddings/cache.py +0 -361
langchain/storage/__init__.py +0 -22
langchain/storage/encoder_backed.py +0 -123
langchain/storage/exceptions.py +0 -5
langchain/storage/in_memory.py +0 -13
langchain-1.0.0a12.dist-info/METADATA +0 -122
langchain-1.0.0a12.dist-info/RECORD +0 -43
{langchain-1.0.0a12.dist-info → langchain-1.0.4.dist-info}/WHEEL +0 -0
{langchain-1.0.0a12.dist-info → langchain-1.0.4.dist-info}/licenses/LICENSE +0 -0

langchain/agents/middleware/_redaction.py ADDED Viewed

@@ -0,0 +1,350 @@
+"""Shared redaction utilities for middleware components."""
+from __future__ import annotations
+import hashlib
+import ipaddress
+import re
+from collections.abc import Callable, Sequence
+from dataclasses import dataclass
+from typing import Literal
+from urllib.parse import urlparse
+from typing_extensions import TypedDict
+RedactionStrategy = Literal["block", "redact", "mask", "hash"]
+"""Supported strategies for handling detected sensitive values."""
+class PIIMatch(TypedDict):
+    """Represents an individual match of sensitive data."""
+    type: str
+    value: str
+    start: int
+    end: int
+class PIIDetectionError(Exception):
+    """Raised when configured to block on detected sensitive values."""
+    def __init__(self, pii_type: str, matches: Sequence[PIIMatch]) -> None:
+        """Initialize the exception with match context.
+        Args:
+            pii_type: Name of the detected sensitive type.
+            matches: All matches that were detected for that type.
+        """
+        self.pii_type = pii_type
+        self.matches = list(matches)
+        count = len(matches)
+        msg = f"Detected {count} instance(s) of {pii_type} in text content"
+        super().__init__(msg)
+Detector = Callable[[str], list[PIIMatch]]
+"""Callable signature for detectors that locate sensitive values."""
+def detect_email(content: str) -> list[PIIMatch]:
+    """Detect email addresses in content."""
+    pattern = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b"
+    return [
+        PIIMatch(
+            type="email",
+            value=match.group(),
+            start=match.start(),
+            end=match.end(),
+        )
+        for match in re.finditer(pattern, content)
+    ]
+def detect_credit_card(content: str) -> list[PIIMatch]:
+    """Detect credit card numbers in content using Luhn validation."""
+    pattern = r"\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b"
+    matches = []
+    for match in re.finditer(pattern, content):
+        card_number = match.group()
+        if _passes_luhn(card_number):
+            matches.append(
+                PIIMatch(
+                    type="credit_card",
+                    value=card_number,
+                    start=match.start(),
+                    end=match.end(),
+                )
+            )
+    return matches
+def detect_ip(content: str) -> list[PIIMatch]:
+    """Detect IPv4 or IPv6 addresses in content."""
+    matches: list[PIIMatch] = []
+    ipv4_pattern = r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b"
+    for match in re.finditer(ipv4_pattern, content):
+        ip_candidate = match.group()
+        try:
+            ipaddress.ip_address(ip_candidate)
+        except ValueError:
+            continue
+        matches.append(
+            PIIMatch(
+                type="ip",
+                value=ip_candidate,
+                start=match.start(),
+                end=match.end(),
+            )
+        )
+    return matches
+def detect_mac_address(content: str) -> list[PIIMatch]:
+    """Detect MAC addresses in content."""
+    pattern = r"\b([0-9A-Fa-f]{2}[:-]){5}[0-9A-Fa-f]{2}\b"
+    return [
+        PIIMatch(
+            type="mac_address",
+            value=match.group(),
+            start=match.start(),
+            end=match.end(),
+        )
+        for match in re.finditer(pattern, content)
+    ]
+def detect_url(content: str) -> list[PIIMatch]:
+    """Detect URLs in content using regex and stdlib validation."""
+    matches: list[PIIMatch] = []
+    # Pattern 1: URLs with scheme (http:// or https://)
+    scheme_pattern = r"https?://[^\s<>\"{}|\\^`\[\]]+"
+    for match in re.finditer(scheme_pattern, content):
+        url = match.group()
+        result = urlparse(url)
+        if result.scheme in ("http", "https") and result.netloc:
+            matches.append(
+                PIIMatch(
+                    type="url",
+                    value=url,
+                    start=match.start(),
+                    end=match.end(),
+                )
+            )
+    # Pattern 2: URLs without scheme (www.example.com or example.com/path)
+    # More conservative to avoid false positives
+    bare_pattern = (
+        r"\b(?:www\.)?[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?"
+        r"(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?:/[^\s]*)?"
+    )
+    for match in re.finditer(bare_pattern, content):
+        start, end = match.start(), match.end()
+        # Skip if already matched with scheme
+        if any(m["start"] <= start < m["end"] or m["start"] < end <= m["end"] for m in matches):
+            continue
+        url = match.group()
+        # Only accept if it has a path or starts with www
+        # This reduces false positives like "example.com" in prose
+        if "/" in url or url.startswith("www."):
+            # Add scheme for validation (required for urlparse to work correctly)
+            test_url = f"http://{url}"
+            result = urlparse(test_url)
+            if result.netloc and "." in result.netloc:
+                matches.append(
+                    PIIMatch(
+                        type="url",
+                        value=url,
+                        start=start,
+                        end=end,
+                    )
+                )
+    return matches
+BUILTIN_DETECTORS: dict[str, Detector] = {
+    "email": detect_email,
+    "credit_card": detect_credit_card,
+    "ip": detect_ip,
+    "mac_address": detect_mac_address,
+    "url": detect_url,
+}
+"""Registry of built-in detectors keyed by type name."""
+def _passes_luhn(card_number: str) -> bool:
+    """Validate credit card number using the Luhn checksum."""
+    digits = [int(d) for d in card_number if d.isdigit()]
+    if not 13 <= len(digits) <= 19:
+        return False
+    checksum = 0
+    for index, digit in enumerate(reversed(digits)):
+        value = digit
+        if index % 2 == 1:
+            value *= 2
+            if value > 9:
+                value -= 9
+        checksum += value
+    return checksum % 10 == 0
+def _apply_redact_strategy(content: str, matches: list[PIIMatch]) -> str:
+    result = content
+    for match in sorted(matches, key=lambda item: item["start"], reverse=True):
+        replacement = f"[REDACTED_{match['type'].upper()}]"
+        result = result[: match["start"]] + replacement + result[match["end"] :]
+    return result
+def _apply_mask_strategy(content: str, matches: list[PIIMatch]) -> str:
+    result = content
+    for match in sorted(matches, key=lambda item: item["start"], reverse=True):
+        value = match["value"]
+        pii_type = match["type"]
+        if pii_type == "email":
+            parts = value.split("@")
+            if len(parts) == 2:
+                domain_parts = parts[1].split(".")
+                masked = (
+                    f"{parts[0]}@****.{domain_parts[-1]}"
+                    if len(domain_parts) >= 2
+                    else f"{parts[0]}@****"
+                )
+            else:
+                masked = "****"
+        elif pii_type == "credit_card":
+            digits_only = "".join(c for c in value if c.isdigit())
+            separator = "-" if "-" in value else " " if " " in value else ""
+            if separator:
+                masked = f"****{separator}****{separator}****{separator}{digits_only[-4:]}"
+            else:
+                masked = f"************{digits_only[-4:]}"
+        elif pii_type == "ip":
+            octets = value.split(".")
+            masked = f"*.*.*.{octets[-1]}" if len(octets) == 4 else "****"
+        elif pii_type == "mac_address":
+            separator = ":" if ":" in value else "-"
+            masked = (
+                f"**{separator}**{separator}**{separator}**{separator}**{separator}{value[-2:]}"
+            )
+        elif pii_type == "url":
+            masked = "[MASKED_URL]"
+        else:
+            masked = f"****{value[-4:]}" if len(value) > 4 else "****"
+        result = result[: match["start"]] + masked + result[match["end"] :]
+    return result
+def _apply_hash_strategy(content: str, matches: list[PIIMatch]) -> str:
+    result = content
+    for match in sorted(matches, key=lambda item: item["start"], reverse=True):
+        digest = hashlib.sha256(match["value"].encode()).hexdigest()[:8]
+        replacement = f"<{match['type']}_hash:{digest}>"
+        result = result[: match["start"]] + replacement + result[match["end"] :]
+    return result
+def apply_strategy(
+    content: str,
+    matches: list[PIIMatch],
+    strategy: RedactionStrategy,
+) -> str:
+    """Apply the configured strategy to matches within content."""
+    if not matches:
+        return content
+    if strategy == "redact":
+        return _apply_redact_strategy(content, matches)
+    if strategy == "mask":
+        return _apply_mask_strategy(content, matches)
+    if strategy == "hash":
+        return _apply_hash_strategy(content, matches)
+    if strategy == "block":
+        raise PIIDetectionError(matches[0]["type"], matches)
+    msg = f"Unknown redaction strategy: {strategy}"
+    raise ValueError(msg)
+def resolve_detector(pii_type: str, detector: Detector | str | None) -> Detector:
+    """Return a callable detector for the given configuration."""
+    if detector is None:
+        if pii_type not in BUILTIN_DETECTORS:
+            msg = (
+                f"Unknown PII type: {pii_type}. "
+                f"Must be one of {list(BUILTIN_DETECTORS.keys())} or provide a custom detector."
+            )
+            raise ValueError(msg)
+        return BUILTIN_DETECTORS[pii_type]
+    if isinstance(detector, str):
+        pattern = re.compile(detector)
+        def regex_detector(content: str) -> list[PIIMatch]:
+            return [
+                PIIMatch(
+                    type=pii_type,
+                    value=match.group(),
+                    start=match.start(),
+                    end=match.end(),
+                )
+                for match in pattern.finditer(content)
+            ]
+        return regex_detector
+    return detector
+@dataclass(frozen=True)
+class RedactionRule:
+    """Configuration for handling a single PII type."""
+    pii_type: str
+    strategy: RedactionStrategy = "redact"
+    detector: Detector | str | None = None
+    def resolve(self) -> ResolvedRedactionRule:
+        """Resolve runtime detector and return an immutable rule."""
+        resolved_detector = resolve_detector(self.pii_type, self.detector)
+        return ResolvedRedactionRule(
+            pii_type=self.pii_type,
+            strategy=self.strategy,
+            detector=resolved_detector,
+        )
+@dataclass(frozen=True)
+class ResolvedRedactionRule:
+    """Resolved redaction rule ready for execution."""
+    pii_type: str
+    strategy: RedactionStrategy
+    detector: Detector
+    def apply(self, content: str) -> tuple[str, list[PIIMatch]]:
+        """Apply this rule to content, returning new content and matches."""
+        matches = self.detector(content)
+        if not matches:
+            return content, []
+        updated = apply_strategy(content, matches, self.strategy)
+        return updated, matches
+__all__ = [
+    "PIIDetectionError",
+    "PIIMatch",
+    "RedactionRule",
+    "ResolvedRedactionRule",
+    "apply_strategy",
+    "detect_credit_card",
+    "detect_email",
+    "detect_ip",
+    "detect_mac_address",
+    "detect_url",
+]

langchain/agents/middleware/context_editing.py CHANGED Viewed

@@ -8,9 +8,9 @@ with any LangChain chat model.
 from __future__ import annotations
-from collections.abc import Callable, Iterable, Sequence
+from collections.abc import Awaitable, Callable, Iterable, Sequence
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Literal
+from typing import Literal
 from langchain_core.messages import (
     AIMessage,
@@ -22,10 +22,12 @@ from langchain_core.messages import (
 from langchain_core.messages.utils import count_tokens_approximately
 from typing_extensions import Protocol
-from langchain.agents.middleware.types import AgentMiddleware, AgentState, ModelRequest
-if TYPE_CHECKING:
-    from langgraph.runtime import Runtime
+from langchain.agents.middleware.types import (
+    AgentMiddleware,
+    ModelCallResult,
+    ModelRequest,
+    ModelResponse,
+)
 DEFAULT_TOOL_PLACEHOLDER = "[cleared]"
@@ -180,11 +182,11 @@ class ClearToolUsesEdit(ContextEdit):
 class ContextEditingMiddleware(AgentMiddleware):
-    """Middleware that automatically prunes tool results to manage context size.
+    """Automatically prunes tool results to manage context size.
     The middleware applies a sequence of edits when the total input token count
-    exceeds configured thresholds. Currently the ``ClearToolUsesEdit`` strategy is
-    supported, aligning with Anthropic's ``clear_tool_uses_20250919`` behaviour.
+    exceeds configured thresholds. Currently the `ClearToolUsesEdit` strategy is
+    supported, aligning with Anthropic's `clear_tool_uses_20250919` behaviour.
     """
     edits: list[ContextEdit]
@@ -196,7 +198,7 @@ class ContextEditingMiddleware(AgentMiddleware):
         edits: Iterable[ContextEdit] | None = None,
         token_count_method: Literal["approximate", "model"] = "approximate",  # noqa: S107
     ) -> None:
-        """Initialise a context editing middleware instance.
+        """Initializes a context editing middleware instance.
         Args:
             edits: Sequence of edit strategies to apply. Defaults to a single
@@ -209,15 +211,42 @@ class ContextEditingMiddleware(AgentMiddleware):
         self.edits = list(edits or (ClearToolUsesEdit(),))
         self.token_count_method = token_count_method
-    def modify_model_request(
+    def wrap_model_call(
+        self,
+        request: ModelRequest,
+        handler: Callable[[ModelRequest], ModelResponse],
+    ) -> ModelCallResult:
+        """Apply context edits before invoking the model via handler."""
+        if not request.messages:
+            return handler(request)
+        if self.token_count_method == "approximate":  # noqa: S105
+            def count_tokens(messages: Sequence[BaseMessage]) -> int:
+                return count_tokens_approximately(messages)
+        else:
+            system_msg = (
+                [SystemMessage(content=request.system_prompt)] if request.system_prompt else []
+            )
+            def count_tokens(messages: Sequence[BaseMessage]) -> int:
+                return request.model.get_num_tokens_from_messages(
+                    system_msg + list(messages), request.tools
+                )
+        for edit in self.edits:
+            edit.apply(request.messages, count_tokens=count_tokens)
+        return handler(request)
+    async def awrap_model_call(
         self,
         request: ModelRequest,
-        state: AgentState,  # noqa: ARG002
-        runtime: Runtime,  # noqa: ARG002
-    ) -> ModelRequest:
-        """Modify the model request by applying context edits before invocation."""
+        handler: Callable[[ModelRequest], Awaitable[ModelResponse]],
+    ) -> ModelCallResult:
+        """Apply context edits before invoking the model via handler (async version)."""
         if not request.messages:
-            return request
+            return await handler(request)
         if self.token_count_method == "approximate":  # noqa: S105
@@ -236,7 +265,7 @@ class ContextEditingMiddleware(AgentMiddleware):
         for edit in self.edits:
             edit.apply(request.messages, count_tokens=count_tokens)
-        return request
+        return await handler(request)
 __all__ = [

langchain 1.0.0a12__py3-none-any.whl → 1.0.4__py3-none-any.whl

langchain 1.0.0a12py3-none-any.whl → 1.0.4py3-none-any.whl