PyPI - deepy-cli - Versions diffs - 0.2.25__tar.gz → 0.2.26__tar.gz - Mend

deepy-cli 0.2.25tar.gz → 0.2.26tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: deepy-cli
-Version: 0.2.25
+Version: 0.2.26
 Summary: Deepy - Vibe coding for DeepSeek models in your terminal
 Keywords: deepseek,coding-agent,terminal,cli,agents
 Author: kirineko

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "deepy-cli"
-version = "0.2.25"
+version = "0.2.26"
 description = "Deepy - Vibe coding for DeepSeek models in your terminal"
 readme = "README.md"
 authors = [

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-__version__ = "0.2.25"
+__version__ = "0.2.26"
 def main() -> None:

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/cli.py RENAMED Viewed

@@ -30,6 +30,7 @@ from .config import (
 from .config.settings import DEFAULT_UI_THEME, UI_THEMES
 from .errors import format_error_display
 from .llm.cache_context import format_cache_usage
+from .llm.multimodal import redact_image_data_urls
 from .llm.provider import build_provider_bundle
 from .llm.runner import DEFAULT_MAX_TURNS, run_prompt_once
 from .sessions import DeepySession, list_session_entries
@@ -614,7 +615,7 @@ def _cmd_sessions(args: argparse.Namespace) -> int:
                     else 0,
                     "cache_break_reason": entry.cache_break_reason if entry is not None else None,
                     "cache_usage": entry.cache_usage if entry is not None else None,
-                    "items": items,
+                    "items": redact_image_data_urls(items),
                 }
             )
         )

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/config/settings.py RENAMED Viewed

@@ -59,6 +59,7 @@ class ModelInfo:
     label: str
     description: str
     supports_thinking: bool = True
+    supports_image_input: bool = False
     default_reasoning_mode: str = "max"
@@ -102,6 +103,7 @@ OPENROUTER_MODEL_CATALOG = (
         name="xiaomi/mimo-v2.5",
         label="MiMo V2.5",
         description="Xiaomi MiMo V2.5 via OpenRouter.",
+        supports_image_input=True,
         default_reasoning_mode="enabled",
     ),
 )
@@ -116,6 +118,7 @@ XIAOMI_MODEL_CATALOG = (
         name="mimo-v2.5",
         label="MiMo V2.5",
         description="Xiaomi official MiMo V2.5.",
+        supports_image_input=True,
         default_reasoning_mode="enabled",
     ),
 )

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/test_shell.md RENAMED Viewed

@@ -11,6 +11,8 @@ redirection, command substitution, heredocs, and background operators.
 Low-risk verification commands run immediately and return command, cwd,
 exit-code, elapsed time, stdout, stderr, and truncation metadata. Medium-risk
-commands return `approval_required` with an `approvalToken`; the main Deepy
-agent must ask the user before retrying the same command with that token.
-Destructive, publishing, mutating, or unsupported commands are denied.
+commands are routed through Deepy's outer audit approval flow when an audit
+policy is active; after approval they still execute through this constrained
+tool. Without an active audit policy, medium-risk commands return
+`approval_required` with an `approvalToken` for same-command retry. Destructive,
+publishing, mutating, or unsupported commands are denied.

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/compaction.py RENAMED Viewed

@@ -132,7 +132,7 @@ async def ensure_context_ready(
     prefix_snapshot: CachePrefixSnapshot | None = None,
     prefix_tools: list[Any] | None = None,
     prefix_mcp_servers: list[Any] | None = None,
-    additional_input: str | None = None,
+    additional_input: Any | None = None,
 ) -> ContextReadiness:
     additional_tokens = estimate_tokens_for_item(additional_input or "")
     state = session.context_token_state()

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/context.py RENAMED Viewed

@@ -4,6 +4,8 @@ from math import ceil
 from typing import Any
 from deepy.config import Settings
+from deepy.llm.multimodal import item_contains_image_content, strip_image_content_from_items
+from deepy.llm.multimodal import supports_image_input
 from deepy.types.sdk import SessionInputCallback
 from deepy.utils import json as json_utils
@@ -28,6 +30,8 @@ def estimate_tokens_for_text(text: str) -> int:
 def estimate_tokens_for_item(item: Any) -> int:
+    if item_contains_image_content(item):
+        return _estimate_multimodal_item_tokens(item)
     if isinstance(item, str):
         return estimate_tokens_for_text(item)
     if isinstance(item, dict):
@@ -37,13 +41,34 @@ def estimate_tokens_for_item(item: Any) -> int:
     return estimate_tokens_for_text(str(item))
+def _estimate_multimodal_item_tokens(item: Any) -> int:
+    if not isinstance(item, dict):
+        return estimate_tokens_for_text(str(item))
+    content = item.get("content")
+    if not isinstance(content, list):
+        return estimate_tokens_for_text(json_utils.dumps(item))
+    tokens = 0
+    for part in content:
+        if not isinstance(part, dict):
+            tokens += estimate_tokens_for_item(part)
+            continue
+        if part.get("type") in {"input_image", "image", "image_url"} or "image_url" in part:
+            tokens += 1024
+            continue
+        tokens += estimate_tokens_for_item(part)
+    return max(tokens, 1)
 def estimate_tokens_for_items(items: list[dict[str, Any]]) -> int:
     return sum(estimate_tokens_for_item(item) for item in items)
 def build_session_input_callback(settings: Settings) -> SessionInputCallback:
     def callback(history: list[Any], new_input: list[Any]) -> list[Any]:
-        return [*history, *new_input]
+        items = [*history, *new_input]
+        if not supports_image_input(settings):
+            return strip_image_content_from_items(items)
+        return items
     return callback

deepy_cli-0.2.26/src/deepy/llm/multimodal.py ADDED Viewed

@@ -0,0 +1,279 @@
+from __future__ import annotations
+import base64
+import re
+from dataclasses import dataclass
+from typing import Any
+from deepy.config import Settings
+SUPPORTED_IMAGE_MIME_TYPES = frozenset(
+    {
+        "image/png",
+        "image/jpeg",
+        "image/webp",
+        "image/gif",
+    }
+)
+DEFAULT_MAX_IMAGE_BYTES = 50 * 1024 * 1024
+UNSUPPORTED_IMAGE_INPUT_MESSAGE = "当前模型不支持图片输入，已忽略粘贴的图片。"
+IMAGE_ONLY_DEFAULT_TEXT = "请描述这张图片的内容，不要执行工具或修改文件。"
+IMAGE_DATA_URL_RE = re.compile(r"^data:image/[a-zA-Z0-9.+-]+;base64,", re.IGNORECASE)
+class ImageAttachmentError(ValueError):
+    pass
+class UnsupportedImageInputError(RuntimeError):
+    pass
+@dataclass(frozen=True)
+class PromptImageAttachment:
+    label: str
+    mime_type: str
+    data_base64: str
+    byte_size: int
+    source: str = "clipboard"
+    data_ref: str | None = None
+    @property
+    def display_label(self) -> str:
+        return f"[{self.label}]"
+    @property
+    def data_url(self) -> str:
+        return f"data:{self.mime_type};base64,{self.data_base64}"
+    def to_input_image_block(self) -> dict[str, str]:
+        return {"type": "input_image", "image_url": self.data_url}
+def supports_image_input(settings: Settings) -> bool:
+    return model_supports_image_input(settings.model.provider, settings.model.name)
+def model_supports_image_input(provider: str, model: str) -> bool:
+    normalized_provider = provider.strip().lower()
+    normalized_model = model.strip().lower()
+    if normalized_provider == "xiaomi":
+        return normalized_model == "mimo-v2.5"
+    if normalized_provider == "openrouter":
+        return normalized_model == "xiaomi/mimo-v2.5"
+    return False
+def validate_image_attachment(
+    *,
+    mime_type: str,
+    byte_size: int,
+    max_bytes: int = DEFAULT_MAX_IMAGE_BYTES,
+) -> None:
+    normalized_mime = mime_type.strip().lower()
+    if normalized_mime not in SUPPORTED_IMAGE_MIME_TYPES:
+        raise ImageAttachmentError(f"不支持的图片格式：{mime_type or 'unknown'}")
+    if byte_size <= 0:
+        raise ImageAttachmentError("图片为空，已忽略粘贴的图片。")
+    if byte_size > max_bytes:
+        raise ImageAttachmentError("图片过大，已忽略粘贴的图片。")
+def build_prompt_image_attachment(
+    *,
+    data: bytes,
+    mime_type: str,
+    index: int,
+    source: str = "clipboard",
+    max_bytes: int = DEFAULT_MAX_IMAGE_BYTES,
+) -> PromptImageAttachment:
+    normalized_mime = mime_type.strip().lower()
+    validate_image_attachment(
+        mime_type=normalized_mime,
+        byte_size=len(data),
+        max_bytes=max_bytes,
+    )
+    return PromptImageAttachment(
+        label=f"图片{index}",
+        mime_type=normalized_mime,
+        data_base64=base64.b64encode(data).decode("ascii"),
+        byte_size=len(data),
+        source=source,
+    )
+def image_attachment_labels(attachments: list[PromptImageAttachment]) -> str:
+    return " ".join(attachment.display_label for attachment in attachments)
+def format_user_prompt_display(prompt: str, attachments: list[PromptImageAttachment]) -> str:
+    labels = image_attachment_labels(attachments)
+    text = prompt.strip()
+    if text and labels:
+        return f"{text}\n{labels}"
+    return text or labels
+def build_user_input(
+    prompt: str,
+    attachments: list[PromptImageAttachment] | None = None,
+) -> str | list[dict[str, Any]]:
+    image_attachments = list(attachments or [])
+    if not image_attachments:
+        return prompt
+    content: list[dict[str, str]] = []
+    if prompt.strip():
+        content.append({"type": "input_text", "text": prompt})
+    content.extend(attachment.to_input_image_block() for attachment in image_attachments)
+    return [{"role": "user", "content": content}]
+def item_contains_image_content(item: Any) -> bool:
+    if isinstance(item, dict):
+        return _content_contains_image(item.get("content"))
+    return _content_contains_image(getattr(item, "content", None))
+def items_contain_image_content(items: list[Any]) -> bool:
+    return any(item_contains_image_content(item) for item in items)
+def strip_image_content_from_items(items: list[Any]) -> list[Any]:
+    stripped: list[Any] = []
+    for item in items:
+        cleaned = strip_image_content_from_item(item)
+        if cleaned is not None:
+            stripped.append(cleaned)
+    return stripped
+def strip_image_content_from_item(item: Any) -> Any | None:
+    if not isinstance(item, dict) or "content" not in item:
+        return item
+    cleaned = dict(item)
+    content = _strip_image_content(item.get("content"))
+    if content is None:
+        return None
+    cleaned["content"] = content
+    return cleaned
+def redacted_content_text(value: Any) -> str:
+    if isinstance(value, str):
+        return _redact_data_urls(value)
+    if isinstance(value, list):
+        parts: list[str] = []
+        image_index = 1
+        for part in value:
+            if not isinstance(part, dict):
+                continue
+            if _part_is_image(part):
+                parts.append(f"[图片{image_index}]")
+                image_index += 1
+                continue
+            text = _text_part(part)
+            if text:
+                parts.append(_redact_data_urls(text))
+        return "\n".join(parts)
+    if isinstance(value, dict):
+        if _part_is_image(value):
+            return "[图片1]"
+        text = _text_part(value)
+        return _redact_data_urls(text) if text else ""
+    return "" if value is None else _redact_data_urls(str(value))
+def redact_image_data_urls(value: Any) -> Any:
+    if isinstance(value, str):
+        return _redact_data_urls(value)
+    if isinstance(value, list):
+        return [redact_image_data_urls(item) for item in value]
+    if isinstance(value, dict):
+        redacted = {key: redact_image_data_urls(item) for key, item in value.items()}
+        if _part_is_image(redacted):
+            if isinstance(redacted.get("image_url"), str):
+                redacted["image_url"] = "[图片]"
+            elif isinstance(redacted.get("image_url"), dict):
+                image_url = dict(redacted["image_url"])
+                image_url["url"] = "[图片]"
+                redacted["image_url"] = image_url
+        return redacted
+    return value
+def normalize_multimodal_content_blocks(content: Any) -> Any:
+    if not isinstance(content, list):
+        return content
+    normalized: list[Any] = []
+    has_text = False
+    has_image = False
+    for part in content:
+        if not isinstance(part, dict):
+            normalized.append(part)
+            continue
+        part_type = part.get("type")
+        if part_type == "input_text":
+            text = part.get("text")
+            if text is None:
+                text = part.get("input_text")
+            text_value = text if isinstance(text, str) else ""
+            has_text = has_text or bool(text_value.strip())
+            normalized.append({"type": "text", "text": text_value})
+            continue
+        if part_type == "input_image":
+            image_url = part.get("image_url")
+            has_image = True
+            normalized.append(
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": image_url if isinstance(image_url, str) else "",
+                    },
+                }
+            )
+            continue
+        if _part_is_image(part):
+            has_image = True
+        elif (text := _text_part(part)).strip():
+            has_text = True
+        normalized.append(part)
+    if has_image and not has_text:
+        normalized.insert(0, {"type": "text", "text": IMAGE_ONLY_DEFAULT_TEXT})
+    return normalized
+def _content_contains_image(content: Any) -> bool:
+    if isinstance(content, list):
+        return any(isinstance(part, dict) and _part_is_image(part) for part in content)
+    return isinstance(content, dict) and _part_is_image(content)
+def _strip_image_content(content: Any) -> Any | None:
+    if isinstance(content, list):
+        parts = [
+            part
+            for part in content
+            if not (isinstance(part, dict) and _part_is_image(part))
+        ]
+        return parts or None
+    if isinstance(content, dict) and _part_is_image(content):
+        return None
+    return content
+def _part_is_image(part: dict[str, Any]) -> bool:
+    part_type = part.get("type")
+    return part_type in {"input_image", "image", "image_url"} or "image_url" in part
+def _text_part(part: dict[str, Any]) -> str:
+    for key in ("text", "input_text", "output_text", "refusal"):
+        value = part.get(key)
+        if isinstance(value, str):
+            return value
+    return ""
+def _redact_data_urls(text: str) -> str:
+    return IMAGE_DATA_URL_RE.sub("data:image/...;base64,", text)

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/provider.py RENAMED Viewed

@@ -8,8 +8,14 @@ from agents import Model, ModelSettings
 from agents import OpenAIChatCompletionsModel
 from deepy.config import Settings
+from deepy.config.settings import infer_provider_from_base_url
 from .cache_context import capture_sdk_request_shape
+from .multimodal import (
+    items_contain_image_content,
+    model_supports_image_input,
+    strip_image_content_from_items,
+)
 from .replay import (
     sanitize_chat_completion_stream_event,
     sanitize_model_input_for_chat_completions,
@@ -43,10 +49,21 @@ class DeepyOpenAIChatCompletionsModel(OpenAIChatCompletionsModel):
         *args: Any,
         **kwargs: Any,
     ) -> Any:
+        model_name = str(getattr(self, "model", ""))
+        base_url = str(getattr(self._get_client(), "base_url", "") or "")
+        inferred_provider = infer_provider_from_base_url(base_url) or (
+            "openrouter" if _is_openrouter_base_url(base_url) else ""
+        )
+        if (
+            isinstance(input, list)
+            and items_contain_image_content(input)
+            and not model_supports_image_input(inferred_provider, model_name)
+        ):
+            input = strip_image_content_from_items(input)
         capture_sdk_request_shape(
             system_instructions=system_instructions,
             input=input,
-            model=str(getattr(self, "model", "")),
+            model=model_name,
             model_settings=args[0] if args else None,
             tools=args[1] if len(args) > 1 and isinstance(args[1], list) else None,
             mcp_servers=None,

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/replay.py RENAMED Viewed

@@ -4,6 +4,8 @@ from collections import Counter
 from collections.abc import Iterable
 from typing import Any, cast
+from deepy.llm.multimodal import normalize_multimodal_content_blocks
 def sanitize_model_input_for_chat_completions(input_value: Any) -> Any:
     if not isinstance(input_value, list):
@@ -92,10 +94,18 @@ def _normalize_chat_tool_items(items: Iterable[Any]) -> list[Any]:
                     }
                 )
                 continue
-        normalized.append(item)
+        normalized.append(_normalize_multimodal_item(item))
     return normalized
+def _normalize_multimodal_item(item: Any) -> Any:
+    if isinstance(item, dict) and "content" in item:
+        normalized = dict(item)
+        normalized["content"] = normalize_multimodal_content_blocks(item.get("content"))
+        return normalized
+    return item
 def sanitize_chat_completion_stream_event(event: Any) -> Any | None:
     if getattr(event, "type", None) == "response.output_item.done" and _is_empty_assistant_message(
         getattr(event, "item", None)

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/runner.py RENAMED Viewed

@@ -30,6 +30,11 @@ from .cache_context import (
 from .compaction import ContextCompactionError, ensure_context_ready
 from .context import build_session_input_callback
 from .events import DeepyStreamEvent, normalize_stream_event
+from .multimodal import (
+    PromptImageAttachment,
+    build_user_input,
+    supports_image_input,
+)
 from .provider import ProviderBundle, build_provider_bundle
 DEFAULT_MAX_TURNS = 100
@@ -73,6 +78,7 @@ async def run_prompt_once(
         list[ApprovalDecision] | Awaitable[list[ApprovalDecision]],
     ]
     | None = None,
+    image_attachments: list[PromptImageAttachment] | None = None,
 ) -> RunSummary:
     from agents import RunConfig, Runner
     from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError
@@ -86,6 +92,9 @@ async def run_prompt_once(
     )
     audit_policy = AuditPolicy(lambda: audit_state.mode, resolved_settings.audit)
     session = DeepySession.open(root, session_id) if session_id else DeepySession.create(root)
+    effective_image_attachments = (
+        list(image_attachments or []) if supports_image_input(resolved_settings) else []
+    )
     initial_todos, _ = normalize_todo_items(session.todo_state())
     runtime = ToolRuntime(
         cwd=root,
@@ -134,7 +143,7 @@ async def run_prompt_once(
             prefix_snapshot=prefix_snapshot,
             prefix_tools=list(getattr(agent, "tools", []) or []),
             prefix_mcp_servers=list(getattr(agent, "mcp_servers", []) or []),
-            additional_input=prompt,
+            additional_input=build_user_input(prompt, effective_image_attachments),
         )
     except ContextCompactionError as exc:
         duration_ms = int((time.time() - started_at) * 1000) if "started_at" in locals() else 0
@@ -175,7 +184,7 @@ async def run_prompt_once(
     prefix_token: Any | None = None
     try:
         prefix_token = set_current_cache_prefix_snapshot(prefix_snapshot)
-        run_input: Any = prompt
+        run_input: Any = build_user_input(prompt, effective_image_attachments)
         while True:
             result = Runner.run_streamed(
                 agent,
@@ -527,7 +536,7 @@ def _approval_server_name(raw_item: Any, tool_name: str) -> str:
 def _approval_action_kind(tool_name: str) -> str:
     if tool_name in {"Write", "Update"}:
         return "text_write"
-    if tool_name == "shell":
+    if tool_name in {"shell", "test_shell"}:
         return "command"
     if tool_name == "task_stop":
         return "background_task_control"

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/prompts/system.py RENAMED Viewed

@@ -66,10 +66,9 @@ Core rules:
   `subagent_explore` for broad read-only investigation, `subagent_reviewer` for
   focused review, and `subagent_tester` for reproduction or verification. Keep
   Deepy responsible for final synthesis and do not delegate tiny one-step work.
-- If a subagent reports `test_shell` `approval_required`, ask the user through
-  `AskUserQuestion` with the exact command, policy reason, and approval token.
-  Retry only the same command through the constrained `test_shell` path after
-  the user approves; do not broaden access to raw shell.
+- If a subagent runs `test_shell`, let medium-risk command approvals surface
+  through Deepy's audit flow. Do not rerun a blocked tester command through raw
+  `shell`; keep verification inside the constrained `test_shell` path.
 Tool protocol:
 Tool results are JSON strings: ok, name, output, error, metadata, awaitUserResponse.

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/sessions/session.py RENAMED Viewed

@@ -454,6 +454,14 @@ class DeepySession:
         increment_cache_generation: bool = False,
         reset_cache_generation: bool = False,
     ) -> None:
+        clear_usage_state = (
+            active_tokens is not None
+            and usage is None
+            and latest_context_window_tokens is None
+            and last_usage_tokens is None
+            and last_usage_record_count is None
+            and cache_usage is None
+        )
         with self._transaction() as conn:
             self._update_session_metadata(
                 conn,
@@ -475,6 +483,19 @@ class DeepySession:
                 increment_cache_generation=increment_cache_generation,
                 reset_cache_generation=reset_cache_generation,
             )
+            if clear_usage_state:
+                conn.execute(
+                    """
+                    update sessions
+                    set usage_json = null,
+                        latest_context_window_tokens = null,
+                        last_usage_tokens = null,
+                        last_usage_record_count = null,
+                        cache_usage_json = null
+                    where id = ?
+                    """,
+                    (self.session_id,),
+                )
     @contextmanager
     def _transaction(self) -> Iterator[sqlite3.Connection]:

{deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/sessions/store_helpers.py RENAMED Viewed

@@ -5,6 +5,7 @@ from typing import Any
 from deepy.todos import todo_state_from_tool_output
 from deepy.utils import json as json_utils
+from deepy.llm.multimodal import redacted_content_text
 CONTEXT_UNDERCOUNT_REPAIR_RATIO = 2
 CONTEXT_UNDERCOUNT_REPAIR_MIN_DELTA = 128
@@ -173,16 +174,8 @@ def session_status(items: list[dict[str, Any]]) -> str:
 def item_text(item: dict[str, Any]) -> str:
     for key in ("content", "text", "output"):
         value = item.get(key)
-        if isinstance(value, str):
-            return value
-        if isinstance(value, list):
-            parts: list[str] = []
-            for part in value:
-                if isinstance(part, dict):
-                    text = part.get("text") or part.get("input_text")
-                    if isinstance(text, str):
-                        parts.append(text)
-            return "".join(parts)
+        if value is not None:
+            return redacted_content_text(value)
     return ""

deepy-cli 0.2.25__tar.gz → 0.2.26__tar.gz

deepy-cli 0.2.25tar.gz → 0.2.26tar.gz