PyPI - ai-lib-python - Versions diffs - 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

ai-lib-python 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

ai_lib_python/__init__.py +1 -1
ai_lib_python/computer_use/__init__.py +228 -0
ai_lib_python/drivers/__init__.py +140 -0
ai_lib_python/drivers/anthropic.py +173 -0
ai_lib_python/drivers/gemini.py +177 -0
ai_lib_python/drivers/openai.py +133 -0
ai_lib_python/mcp/__init__.py +181 -0
ai_lib_python/multimodal/__init__.py +138 -0
ai_lib_python/protocol/v2/__init__.py +22 -0
ai_lib_python/protocol/v2/capabilities.py +198 -0
ai_lib_python/protocol/v2/manifest.py +256 -0
ai_lib_python/registry/__init__.py +174 -0
{ai_lib_python-0.6.0.dist-info → ai_lib_python-0.7.0.dist-info}/METADATA +9 -4
{ai_lib_python-0.6.0.dist-info → ai_lib_python-0.7.0.dist-info}/RECORD +17 -6
{ai_lib_python-0.6.0.dist-info → ai_lib_python-0.7.0.dist-info}/WHEEL +0 -0
{ai_lib_python-0.6.0.dist-info → ai_lib_python-0.7.0.dist-info}/licenses/LICENSE-APACHE +0 -0
{ai_lib_python-0.6.0.dist-info → ai_lib_python-0.7.0.dist-info}/licenses/LICENSE-MIT +0 -0

ai_lib_python/__init__.py CHANGED Viewed

@@ -27,7 +27,7 @@ from ai_lib_python.types.message import (
 )
 from ai_lib_python.types.tool import ToolCall, ToolDefinition
-__version__ = "0.6.0"
+__version__ = "0.7.0"
 __all__ = [
     # Client

ai_lib_python/computer_use/__init__.py ADDED Viewed

@@ -0,0 +1,228 @@
+"""Computer Use 抽象层 — 提供跨厂商的 GUI 自动化操作标准化和安全控制。
+Computer Use abstraction layer for AI-Protocol. Provides:
+- Normalized action types across providers (screen_based, tool_based)
+- Safety policy enforcement (confirmation, sandbox, logging, domain allowlist)
+- Provider-specific configuration extraction
+- Action validation before execution
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any
+from urllib.parse import urlparse
+# ─── Normalized Action Types ────────────────────────────────────────────────
+class ActionType(str, Enum):
+    """Normalized computer use action types."""
+    SCREENSHOT = "screenshot"
+    MOUSE_CLICK = "mouse_click"
+    MOUSE_DOUBLE_CLICK = "mouse_double_click"
+    MOUSE_DRAG = "mouse_drag"
+    SCROLL = "scroll"
+    MOUSE_MOVE = "mouse_move"
+    KEYBOARD_TYPE = "keyboard_type"
+    KEYBOARD_SHORTCUT = "keyboard_shortcut"
+    BROWSER_NAVIGATE = "browser_navigate"
+    BROWSER_CLICK_ELEMENT = "browser_click_element"
+    BROWSER_FILL_FORM = "browser_fill_form"
+    ZOOM_REGION = "zoom_region"
+    FILE_READ = "file_read"
+    FILE_WRITE = "file_write"
+class MouseButton(str, Enum):
+    LEFT = "left"
+    RIGHT = "right"
+    MIDDLE = "middle"
+@dataclass
+class ComputerAction:
+    """A normalized computer use action — provider-agnostic."""
+    action_type: ActionType
+    params: dict[str, Any] = field(default_factory=dict)
+    # -- convenience factories --
+    @classmethod
+    def screenshot(cls, fmt: str = "png") -> ComputerAction:
+        return cls(ActionType.SCREENSHOT, {"format": fmt})
+    @classmethod
+    def mouse_click(
+        cls, x: float, y: float, button: MouseButton = MouseButton.LEFT
+    ) -> ComputerAction:
+        return cls(ActionType.MOUSE_CLICK, {"x": x, "y": y, "button": button.value})
+    @classmethod
+    def keyboard_type(cls, text: str) -> ComputerAction:
+        return cls(ActionType.KEYBOARD_TYPE, {"text": text})
+    @classmethod
+    def keyboard_shortcut(cls, keys: list[str]) -> ComputerAction:
+        return cls(ActionType.KEYBOARD_SHORTCUT, {"keys": keys})
+    @classmethod
+    def browser_navigate(cls, url: str) -> ComputerAction:
+        return cls(ActionType.BROWSER_NAVIGATE, {"url": url})
+    @classmethod
+    def file_read(cls, path: str) -> ComputerAction:
+        return cls(ActionType.FILE_READ, {"path": path})
+    @classmethod
+    def file_write(cls, path: str, content: str) -> ComputerAction:
+        return cls(ActionType.FILE_WRITE, {"path": path, "content": content})
+class ImplementationStyle(str, Enum):
+    """Provider implementation approach."""
+    SCREEN_BASED = "screen_based"
+    TOOL_BASED = "tool_based"
+    HYBRID = "hybrid"
+class SandboxMode(str, Enum):
+    REQUIRED = "required"
+    RECOMMENDED = "recommended"
+    OPTIONAL = "optional"
+# ─── Safety Policy ──────────────────────────────────────────────────────────
+class SafetyViolation(Exception):
+    """Raised when a computer use action violates the safety policy."""
+@dataclass
+class SafetyPolicy:
+    """Safety policy for computer use actions.
+    Loaded from the manifest's ``computer_use.safety`` configuration.
+    All validations are enforced *before* the action is dispatched.
+    """
+    confirmation_required: bool = True
+    sandbox_mode: SandboxMode = SandboxMode.RECOMMENDED
+    action_logging: bool = True
+    domain_allowlist: set[str] = field(default_factory=set)
+    sensitive_data_protection: bool = True
+    max_actions_per_turn: int = 0
+    action_timeout_ms: int = 30_000
+    @classmethod
+    def from_config(cls, safety_dict: dict[str, Any] | None) -> SafetyPolicy:
+        """Build a safety policy from a manifest's ``computer_use.safety`` dict."""
+        if not safety_dict:
+            return cls()
+        return cls(
+            confirmation_required=safety_dict.get("confirmation_required", True),
+            sandbox_mode=SandboxMode(safety_dict.get("sandbox_mode", "recommended")),
+            action_logging=safety_dict.get("action_logging", True),
+            domain_allowlist=set(safety_dict.get("domain_allowlist_entries", [])),
+            sensitive_data_protection=safety_dict.get("sensitive_data_protection", True),
+            max_actions_per_turn=safety_dict.get("max_actions_per_turn", 0),
+            action_timeout_ms=safety_dict.get("action_timeout_ms", 30_000),
+        )
+    def validate_action(
+        self,
+        action: ComputerAction,
+        actions_this_turn: int = 0,
+    ) -> None:
+        """Validate an action against this policy. Raises :class:`SafetyViolation`."""
+        if self.max_actions_per_turn > 0 and actions_this_turn >= self.max_actions_per_turn:
+            raise SafetyViolation(
+                f"Max actions per turn exceeded: limit={self.max_actions_per_turn}, "
+                f"attempted={actions_this_turn + 1}"
+            )
+        if action.action_type == ActionType.BROWSER_NAVIGATE and self.domain_allowlist:
+            url = action.params.get("url", "")
+            domain = _extract_domain(url)
+            if domain not in self.domain_allowlist:
+                raise SafetyViolation(
+                    f"Domain '{domain}' is not in the allowlist: {sorted(self.domain_allowlist)}"
+                )
+        if self.sensitive_data_protection and action.action_type in (
+            ActionType.FILE_READ,
+            ActionType.FILE_WRITE,
+        ):
+            path = action.params.get("path", "")
+            if _is_sensitive_path(path):
+                raise SafetyViolation(f"Access to sensitive path '{path}' is blocked")
+# ─── Provider Configuration ─────────────────────────────────────────────────
+@dataclass
+class CuProviderConfig:
+    """Provider-specific computer use configuration."""
+    tool_type: str = "computer_use"
+    beta_header: str | None = None
+    implementation: ImplementationStyle = ImplementationStyle.SCREEN_BASED
+    model_requirement: str | None = None
+def extract_provider_config(cu_config: dict[str, Any] | None) -> CuProviderConfig | None:
+    """Extract provider-specific CU configuration from a manifest section."""
+    if not cu_config or not cu_config.get("supported"):
+        return None
+    impl_str = cu_config.get("implementation", "screen_based")
+    implementation = ImplementationStyle(impl_str)
+    mapping = cu_config.get("provider_mapping", {})
+    return CuProviderConfig(
+        tool_type=mapping.get("tool_type", "computer_use"),
+        beta_header=mapping.get("beta_header"),
+        implementation=implementation,
+        model_requirement=mapping.get("model_requirement"),
+    )
+# ─── Helpers ────────────────────────────────────────────────────────────────
+_SENSITIVE_PATTERNS = (
+    ".ssh", ".gnupg", ".aws", "credentials", "secrets",
+    ".env", "password", "token", ".kube/config",
+)
+def _extract_domain(url: str) -> str:
+    try:
+        parsed = urlparse(url)
+        return parsed.hostname or ""
+    except Exception:
+        return url.split("//")[-1].split("/")[0].split(":")[0]
+def _is_sensitive_path(path: str) -> bool:
+    lower = path.lower()
+    return any(p in lower for p in _SENSITIVE_PATTERNS)
+__all__ = [
+    "ActionType",
+    "ComputerAction",
+    "CuProviderConfig",
+    "ImplementationStyle",
+    "MouseButton",
+    "SafetyPolicy",
+    "SafetyViolation",
+    "SandboxMode",
+    "extract_provider_config",
+]

ai_lib_python/drivers/__init__.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""Provider 驱动抽象层 — 通过 ABC 实现多厂商 API 适配的动态分发。
+Provider driver abstraction layer implementing the ProviderContract specification.
+Uses abstract base class + factory for runtime polymorphism, enabling the same
+client code to work with OpenAI, Anthropic, Gemini, and any compatible provider.
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Any
+from ai_lib_python.protocol.v2.capabilities import Capability
+from ai_lib_python.protocol.v2.manifest import ApiStyle
+from ai_lib_python.types.events import StreamingEvent
+from ai_lib_python.types.message import Message
+@dataclass
+class DriverRequest:
+    """Unified HTTP request representation for provider communication."""
+    url: str = ""
+    method: str = "POST"
+    headers: dict[str, str] = field(default_factory=dict)
+    body: dict[str, Any] = field(default_factory=dict)
+    stream: bool = False
+@dataclass
+class DriverResponse:
+    """Unified chat response from provider."""
+    content: str | None = None
+    finish_reason: str | None = None
+    usage: UsageInfo | None = None
+    tool_calls: list[dict[str, Any]] = field(default_factory=list)
+    raw: dict[str, Any] = field(default_factory=dict)
+@dataclass
+class UsageInfo:
+    """Token usage information."""
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+class ProviderDriver(ABC):
+    """Core abstract class for provider-specific API adaptation.
+    Each provider API style (OpenAI, Anthropic, Gemini) has a concrete
+    implementation. The runtime selects the correct driver based on the
+    manifest's ``api_style`` or ``provider_contract``.
+    """
+    @property
+    @abstractmethod
+    def provider_id(self) -> str:
+        """Unique provider identifier (matches manifest ``id``)."""
+    @property
+    @abstractmethod
+    def api_style(self) -> ApiStyle:
+        """API style this driver implements."""
+    @abstractmethod
+    def build_request(
+        self,
+        messages: list[Message],
+        model: str,
+        *,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        stream: bool = False,
+        extra: dict[str, Any] | None = None,
+    ) -> DriverRequest:
+        """Build a provider-specific HTTP request from unified parameters."""
+    @abstractmethod
+    def parse_response(self, body: dict[str, Any]) -> DriverResponse:
+        """Parse a non-streaming response into unified format."""
+    @abstractmethod
+    def parse_stream_event(self, data: str) -> StreamingEvent | None:
+        """Parse a single streaming event from raw SSE/NDJSON data."""
+    @abstractmethod
+    def supported_capabilities(self) -> list[Capability]:
+        """Get the list of capabilities this driver supports."""
+    @abstractmethod
+    def is_stream_done(self, data: str) -> bool:
+        """Check if the done signal has been received in streaming."""
+# ---------------------------------------------------------------------------
+# Concrete drivers (imported lazily to avoid circular deps)
+# ---------------------------------------------------------------------------
+from ai_lib_python.drivers.anthropic import AnthropicDriver  # noqa: E402
+from ai_lib_python.drivers.gemini import GeminiDriver  # noqa: E402
+from ai_lib_python.drivers.openai import OpenAiDriver  # noqa: E402
+def create_driver(
+    api_style: ApiStyle,
+    provider_id: str,
+    capabilities: list[Capability] | None = None,
+) -> ProviderDriver:
+    """Factory: create the appropriate driver from an API style.
+    ``Custom`` falls back to OpenAI-compatible, which covers most
+    providers that follow the OpenAI chat completions format (DeepSeek,
+    Moonshot, Zhipu, etc.).
+    """
+    caps = capabilities or []
+    match api_style:
+        case ApiStyle.OPENAI_COMPATIBLE | ApiStyle.CUSTOM:
+            return OpenAiDriver(provider_id=provider_id, capabilities=caps)
+        case ApiStyle.ANTHROPIC_MESSAGES:
+            return AnthropicDriver(provider_id=provider_id, capabilities=caps)
+        case ApiStyle.GEMINI_GENERATE:
+            return GeminiDriver(provider_id=provider_id, capabilities=caps)
+        case _:
+            return OpenAiDriver(provider_id=provider_id, capabilities=caps)
+__all__ = [
+    "AnthropicDriver",
+    "DriverRequest",
+    "DriverResponse",
+    "GeminiDriver",
+    "OpenAiDriver",
+    "ProviderDriver",
+    "UsageInfo",
+    "create_driver",
+]

ai_lib_python/drivers/anthropic.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""Anthropic Messages API 驱动 — 实现 Anthropic 特有的请求/响应格式转换。
+Anthropic Messages API driver. Key differences from OpenAI:
+- System messages are a top-level ``system`` parameter, not part of ``messages``.
+- Content uses typed blocks: ``[{"type": "text", "text": "..."}]``.
+- Streaming uses ``event: content_block_delta`` with ``delta.text``.
+- Response uses ``content[0].text`` instead of ``choices[0].message.content``.
+- ``max_tokens`` is required, not optional.
+"""
+from __future__ import annotations
+import json
+from typing import Any
+from ai_lib_python.drivers import (
+    DriverRequest,
+    DriverResponse,
+    ProviderDriver,
+    UsageInfo,
+)
+from ai_lib_python.protocol.v2.capabilities import Capability
+from ai_lib_python.protocol.v2.manifest import ApiStyle
+from ai_lib_python.types.events import StreamingEvent
+from ai_lib_python.types.message import Message
+_DEFAULT_MAX_TOKENS = 4096
+# Anthropic stop_reason → AI-Protocol normalized finish_reason
+_STOP_REASON_MAP: dict[str, str] = {
+    "end_turn": "stop",
+    "max_tokens": "length",
+    "tool_use": "tool_calls",
+}
+class AnthropicDriver(ProviderDriver):
+    """Anthropic Messages API driver."""
+    def __init__(
+        self,
+        provider_id: str,
+        capabilities: list[Capability] | None = None,
+    ) -> None:
+        self._provider_id = provider_id
+        self._capabilities = capabilities or []
+    @property
+    def provider_id(self) -> str:
+        return self._provider_id
+    @property
+    def api_style(self) -> ApiStyle:
+        return ApiStyle.ANTHROPIC_MESSAGES
+    def build_request(
+        self,
+        messages: list[Message],
+        model: str,
+        *,
+        temperature: float | None = None,
+        max_tokens: int | None = None,
+        stream: bool = False,
+        extra: dict[str, Any] | None = None,
+    ) -> DriverRequest:
+        system_text, msgs = self._split_system(messages)
+        body: dict[str, Any] = {
+            "model": model,
+            "messages": msgs,
+            "max_tokens": max_tokens or _DEFAULT_MAX_TOKENS,
+            "stream": stream,
+        }
+        if system_text:
+            body["system"] = system_text
+        if temperature is not None:
+            body["temperature"] = temperature
+        if extra:
+            body.update(extra)
+        headers = {"anthropic-version": "2023-06-01"}
+        return DriverRequest(body=body, stream=stream, headers=headers)
+    def parse_response(self, body: dict[str, Any]) -> DriverResponse:
+        # content: [{type: "text", text: "..."}]
+        content_blocks = body.get("content", [])
+        text = next(
+            (b["text"] for b in content_blocks if b.get("type") == "text"),
+            None,
+        )
+        # Normalize stop_reason
+        raw_reason = body.get("stop_reason", "")
+        finish_reason = _STOP_REASON_MAP.get(raw_reason, raw_reason) or None
+        usage_raw = body.get("usage")
+        usage = None
+        if usage_raw:
+            inp = usage_raw.get("input_tokens", 0)
+            out = usage_raw.get("output_tokens", 0)
+            usage = UsageInfo(prompt_tokens=inp, completion_tokens=out, total_tokens=inp + out)
+        tool_calls = [b for b in content_blocks if b.get("type") == "tool_use"]
+        return DriverResponse(
+            content=text,
+            finish_reason=finish_reason,
+            usage=usage,
+            tool_calls=tool_calls,
+            raw=body,
+        )
+    def parse_stream_event(self, data: str) -> StreamingEvent | None:
+        stripped = data.strip()
+        if not stripped:
+            return None
+        chunk = json.loads(stripped)
+        event_type = chunk.get("type", "")
+        if event_type == "content_block_delta":
+            delta = chunk.get("delta", {})
+            if text := delta.get("text"):
+                seq = chunk.get("index")
+                return StreamingEvent.content_delta(text, sequence_id=seq)
+            if thinking := delta.get("thinking"):
+                return StreamingEvent.thinking_delta(thinking)
+            return None
+        if event_type == "message_delta":
+            reason = chunk.get("delta", {}).get("stop_reason")
+            if reason:
+                return StreamingEvent.stream_end(_STOP_REASON_MAP.get(reason, reason))
+            return None
+        if event_type == "message_stop":
+            return StreamingEvent.stream_end("stop")
+        if event_type == "error":
+            return StreamingEvent.stream_error(chunk.get("error"))
+        return None
+    def supported_capabilities(self) -> list[Capability]:
+        return list(self._capabilities)
+    def is_stream_done(self, _data: str) -> bool:
+        # Anthropic signals done via event type, not a sentinel string.
+        return False
+    # -- internal helpers ------------------------------------------------
+    @staticmethod
+    def _split_system(messages: list[Message]) -> tuple[str | None, list[dict[str, Any]]]:
+        """Extract system text and convert remaining messages to Anthropic format."""
+        system_parts: list[str] = []
+        msgs: list[dict[str, Any]] = []
+        for m in messages:
+            role = m.role if isinstance(m.role, str) else m.role.value
+            if role == "system":
+                if isinstance(m.content, str):
+                    system_parts.append(m.content)
+                continue
+            if isinstance(m.content, str):
+                content: Any = [{"type": "text", "text": m.content}]
+            else:
+                content = [b.model_dump(by_alias=True) for b in m.content]
+            msgs.append({"role": role, "content": content})
+        system_text = "\n\n".join(system_parts) if system_parts else None
+        return system_text, msgs

ai-lib-python 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

ai-lib-python 0.6.0py3-none-any.whl → 0.7.0py3-none-any.whl