deepy-cli 0.2.25__tar.gz → 0.2.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/PKG-INFO +1 -1
  2. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/pyproject.toml +1 -1
  3. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/__init__.py +1 -1
  4. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/cli.py +2 -1
  5. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/config/settings.py +3 -0
  6. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/test_shell.md +5 -3
  7. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/compaction.py +1 -1
  8. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/context.py +26 -1
  9. deepy_cli-0.2.26/src/deepy/llm/multimodal.py +279 -0
  10. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/provider.py +18 -1
  11. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/replay.py +11 -1
  12. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/runner.py +12 -3
  13. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/prompts/system.py +3 -4
  14. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/sessions/session.py +21 -0
  15. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/sessions/store_helpers.py +3 -10
  16. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tools/agents.py +38 -1
  17. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tools/builtin.py +2 -0
  18. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tools/test_shell.py +21 -6
  19. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tui/app.py +60 -14
  20. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tui/screens.py +8 -1
  21. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tui/widgets.py +109 -4
  22. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/audit_approval_panel.py +8 -2
  23. deepy_cli-0.2.26/src/deepy/ui/image_input.py +527 -0
  24. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/prompt_input.py +102 -3
  25. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/terminal.py +45 -12
  26. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/README.md +0 -0
  27. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/__main__.py +0 -0
  28. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/audit.py +0 -0
  29. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/background_tasks.py +0 -0
  30. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/config/__init__.py +0 -0
  31. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/__init__.py +0 -0
  32. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/skills/skill-creator/SKILL.md +0 -0
  33. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/skills/skill-installer/SKILL.md +0 -0
  34. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/AskUserQuestion.md +0 -0
  35. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/Read.md +0 -0
  36. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/Search.md +0 -0
  37. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/Update.md +0 -0
  38. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/WebFetch.md +0 -0
  39. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/WebSearch.md +0 -0
  40. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/Write.md +0 -0
  41. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/__init__.py +0 -0
  42. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/shell.md +0 -0
  43. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/task_list.md +0 -0
  44. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/task_output.md +0 -0
  45. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/task_stop.md +0 -0
  46. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/data/tools/todo_write.md +0 -0
  47. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/errors.py +0 -0
  48. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/input_suggestions.py +0 -0
  49. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/__init__.py +0 -0
  50. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/agent.py +0 -0
  51. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/cache_context.py +0 -0
  52. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/events.py +0 -0
  53. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/model_capabilities.py +0 -0
  54. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/llm/thinking.py +0 -0
  55. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/mcp.py +0 -0
  56. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/prompts/__init__.py +0 -0
  57. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/prompts/compact.py +0 -0
  58. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/prompts/init_agents.py +0 -0
  59. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/prompts/rules.py +0 -0
  60. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/prompts/runtime_context.py +0 -0
  61. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/prompts/tool_docs.py +0 -0
  62. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/session_cost.py +0 -0
  63. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/sessions/__init__.py +0 -0
  64. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/sessions/index.py +0 -0
  65. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/sessions/manager.py +0 -0
  66. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/skill_market.py +0 -0
  67. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/skills.py +0 -0
  68. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/status.py +0 -0
  69. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/subagents.py +0 -0
  70. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/todos.py +0 -0
  71. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tools/__init__.py +0 -0
  72. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tools/file_state.py +0 -0
  73. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tools/result.py +0 -0
  74. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tools/search.py +0 -0
  75. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tools/shell_output.py +0 -0
  76. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tools/shell_utils.py +0 -0
  77. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tui/__init__.py +0 -0
  78. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tui/commands.py +0 -0
  79. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tui/compat.py +0 -0
  80. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tui/diff.py +0 -0
  81. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tui/runner.py +0 -0
  82. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/tui/state.py +0 -0
  83. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/types/__init__.py +0 -0
  84. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/types/sdk.py +0 -0
  85. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/types/tool_payloads.py +0 -0
  86. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/__init__.py +0 -0
  87. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/app.py +0 -0
  88. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/ask_user_question.py +0 -0
  89. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/audit_approval_picker.py +0 -0
  90. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/exit_summary.py +0 -0
  91. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/file_mentions.py +0 -0
  92. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/loading_text.py +0 -0
  93. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/local_command.py +0 -0
  94. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/markdown.py +0 -0
  95. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/message_view.py +0 -0
  96. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/model_picker.py +0 -0
  97. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/prompt_buffer.py +0 -0
  98. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/session_list.py +0 -0
  99. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/session_picker.py +0 -0
  100. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/skill_picker.py +0 -0
  101. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/slash_commands.py +0 -0
  102. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/status_footer.py +0 -0
  103. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/styles.py +0 -0
  104. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/theme_picker.py +0 -0
  105. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/thinking_state.py +0 -0
  106. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/ui/welcome.py +0 -0
  107. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/update_check.py +0 -0
  108. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/usage.py +0 -0
  109. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/utils/__init__.py +0 -0
  110. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/utils/debug_logger.py +0 -0
  111. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/utils/error_logger.py +0 -0
  112. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/utils/json.py +0 -0
  113. {deepy_cli-0.2.25 → deepy_cli-0.2.26}/src/deepy/utils/notify.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: deepy-cli
3
- Version: 0.2.25
3
+ Version: 0.2.26
4
4
  Summary: Deepy - Vibe coding for DeepSeek models in your terminal
5
5
  Keywords: deepseek,coding-agent,terminal,cli,agents
6
6
  Author: kirineko
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "deepy-cli"
3
- version = "0.2.25"
3
+ version = "0.2.26"
4
4
  description = "Deepy - Vibe coding for DeepSeek models in your terminal"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.2.25"
3
+ __version__ = "0.2.26"
4
4
 
5
5
 
6
6
  def main() -> None:
@@ -30,6 +30,7 @@ from .config import (
30
30
  from .config.settings import DEFAULT_UI_THEME, UI_THEMES
31
31
  from .errors import format_error_display
32
32
  from .llm.cache_context import format_cache_usage
33
+ from .llm.multimodal import redact_image_data_urls
33
34
  from .llm.provider import build_provider_bundle
34
35
  from .llm.runner import DEFAULT_MAX_TURNS, run_prompt_once
35
36
  from .sessions import DeepySession, list_session_entries
@@ -614,7 +615,7 @@ def _cmd_sessions(args: argparse.Namespace) -> int:
614
615
  else 0,
615
616
  "cache_break_reason": entry.cache_break_reason if entry is not None else None,
616
617
  "cache_usage": entry.cache_usage if entry is not None else None,
617
- "items": items,
618
+ "items": redact_image_data_urls(items),
618
619
  }
619
620
  )
620
621
  )
@@ -59,6 +59,7 @@ class ModelInfo:
59
59
  label: str
60
60
  description: str
61
61
  supports_thinking: bool = True
62
+ supports_image_input: bool = False
62
63
  default_reasoning_mode: str = "max"
63
64
 
64
65
 
@@ -102,6 +103,7 @@ OPENROUTER_MODEL_CATALOG = (
102
103
  name="xiaomi/mimo-v2.5",
103
104
  label="MiMo V2.5",
104
105
  description="Xiaomi MiMo V2.5 via OpenRouter.",
106
+ supports_image_input=True,
105
107
  default_reasoning_mode="enabled",
106
108
  ),
107
109
  )
@@ -116,6 +118,7 @@ XIAOMI_MODEL_CATALOG = (
116
118
  name="mimo-v2.5",
117
119
  label="MiMo V2.5",
118
120
  description="Xiaomi official MiMo V2.5.",
121
+ supports_image_input=True,
119
122
  default_reasoning_mode="enabled",
120
123
  ),
121
124
  )
@@ -11,6 +11,8 @@ redirection, command substitution, heredocs, and background operators.
11
11
 
12
12
  Low-risk verification commands run immediately and return command, cwd,
13
13
  exit-code, elapsed time, stdout, stderr, and truncation metadata. Medium-risk
14
- commands return `approval_required` with an `approvalToken`; the main Deepy
15
- agent must ask the user before retrying the same command with that token.
16
- Destructive, publishing, mutating, or unsupported commands are denied.
14
+ commands are routed through Deepy's outer audit approval flow when an audit
15
+ policy is active; after approval they still execute through this constrained
16
+ tool. Without an active audit policy, medium-risk commands return
17
+ `approval_required` with an `approvalToken` for same-command retry. Destructive,
18
+ publishing, mutating, or unsupported commands are denied.
@@ -132,7 +132,7 @@ async def ensure_context_ready(
132
132
  prefix_snapshot: CachePrefixSnapshot | None = None,
133
133
  prefix_tools: list[Any] | None = None,
134
134
  prefix_mcp_servers: list[Any] | None = None,
135
- additional_input: str | None = None,
135
+ additional_input: Any | None = None,
136
136
  ) -> ContextReadiness:
137
137
  additional_tokens = estimate_tokens_for_item(additional_input or "")
138
138
  state = session.context_token_state()
@@ -4,6 +4,8 @@ from math import ceil
4
4
  from typing import Any
5
5
 
6
6
  from deepy.config import Settings
7
+ from deepy.llm.multimodal import item_contains_image_content, strip_image_content_from_items
8
+ from deepy.llm.multimodal import supports_image_input
7
9
  from deepy.types.sdk import SessionInputCallback
8
10
  from deepy.utils import json as json_utils
9
11
 
@@ -28,6 +30,8 @@ def estimate_tokens_for_text(text: str) -> int:
28
30
 
29
31
 
30
32
  def estimate_tokens_for_item(item: Any) -> int:
33
+ if item_contains_image_content(item):
34
+ return _estimate_multimodal_item_tokens(item)
31
35
  if isinstance(item, str):
32
36
  return estimate_tokens_for_text(item)
33
37
  if isinstance(item, dict):
@@ -37,13 +41,34 @@ def estimate_tokens_for_item(item: Any) -> int:
37
41
  return estimate_tokens_for_text(str(item))
38
42
 
39
43
 
44
+ def _estimate_multimodal_item_tokens(item: Any) -> int:
45
+ if not isinstance(item, dict):
46
+ return estimate_tokens_for_text(str(item))
47
+ content = item.get("content")
48
+ if not isinstance(content, list):
49
+ return estimate_tokens_for_text(json_utils.dumps(item))
50
+ tokens = 0
51
+ for part in content:
52
+ if not isinstance(part, dict):
53
+ tokens += estimate_tokens_for_item(part)
54
+ continue
55
+ if part.get("type") in {"input_image", "image", "image_url"} or "image_url" in part:
56
+ tokens += 1024
57
+ continue
58
+ tokens += estimate_tokens_for_item(part)
59
+ return max(tokens, 1)
60
+
61
+
40
62
  def estimate_tokens_for_items(items: list[dict[str, Any]]) -> int:
41
63
  return sum(estimate_tokens_for_item(item) for item in items)
42
64
 
43
65
 
44
66
  def build_session_input_callback(settings: Settings) -> SessionInputCallback:
45
67
  def callback(history: list[Any], new_input: list[Any]) -> list[Any]:
46
- return [*history, *new_input]
68
+ items = [*history, *new_input]
69
+ if not supports_image_input(settings):
70
+ return strip_image_content_from_items(items)
71
+ return items
47
72
 
48
73
  return callback
49
74
 
@@ -0,0 +1,279 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import re
5
+ from dataclasses import dataclass
6
+ from typing import Any
7
+
8
+ from deepy.config import Settings
9
+
10
+
11
+ SUPPORTED_IMAGE_MIME_TYPES = frozenset(
12
+ {
13
+ "image/png",
14
+ "image/jpeg",
15
+ "image/webp",
16
+ "image/gif",
17
+ }
18
+ )
19
+ DEFAULT_MAX_IMAGE_BYTES = 50 * 1024 * 1024
20
+ UNSUPPORTED_IMAGE_INPUT_MESSAGE = "当前模型不支持图片输入,已忽略粘贴的图片。"
21
+ IMAGE_ONLY_DEFAULT_TEXT = "请描述这张图片的内容,不要执行工具或修改文件。"
22
+ IMAGE_DATA_URL_RE = re.compile(r"^data:image/[a-zA-Z0-9.+-]+;base64,", re.IGNORECASE)
23
+
24
+
25
+ class ImageAttachmentError(ValueError):
26
+ pass
27
+
28
+
29
+ class UnsupportedImageInputError(RuntimeError):
30
+ pass
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class PromptImageAttachment:
35
+ label: str
36
+ mime_type: str
37
+ data_base64: str
38
+ byte_size: int
39
+ source: str = "clipboard"
40
+ data_ref: str | None = None
41
+
42
+ @property
43
+ def display_label(self) -> str:
44
+ return f"[{self.label}]"
45
+
46
+ @property
47
+ def data_url(self) -> str:
48
+ return f"data:{self.mime_type};base64,{self.data_base64}"
49
+
50
+ def to_input_image_block(self) -> dict[str, str]:
51
+ return {"type": "input_image", "image_url": self.data_url}
52
+
53
+
54
+ def supports_image_input(settings: Settings) -> bool:
55
+ return model_supports_image_input(settings.model.provider, settings.model.name)
56
+
57
+
58
+ def model_supports_image_input(provider: str, model: str) -> bool:
59
+ normalized_provider = provider.strip().lower()
60
+ normalized_model = model.strip().lower()
61
+ if normalized_provider == "xiaomi":
62
+ return normalized_model == "mimo-v2.5"
63
+ if normalized_provider == "openrouter":
64
+ return normalized_model == "xiaomi/mimo-v2.5"
65
+ return False
66
+
67
+
68
+ def validate_image_attachment(
69
+ *,
70
+ mime_type: str,
71
+ byte_size: int,
72
+ max_bytes: int = DEFAULT_MAX_IMAGE_BYTES,
73
+ ) -> None:
74
+ normalized_mime = mime_type.strip().lower()
75
+ if normalized_mime not in SUPPORTED_IMAGE_MIME_TYPES:
76
+ raise ImageAttachmentError(f"不支持的图片格式:{mime_type or 'unknown'}")
77
+ if byte_size <= 0:
78
+ raise ImageAttachmentError("图片为空,已忽略粘贴的图片。")
79
+ if byte_size > max_bytes:
80
+ raise ImageAttachmentError("图片过大,已忽略粘贴的图片。")
81
+
82
+
83
+ def build_prompt_image_attachment(
84
+ *,
85
+ data: bytes,
86
+ mime_type: str,
87
+ index: int,
88
+ source: str = "clipboard",
89
+ max_bytes: int = DEFAULT_MAX_IMAGE_BYTES,
90
+ ) -> PromptImageAttachment:
91
+ normalized_mime = mime_type.strip().lower()
92
+ validate_image_attachment(
93
+ mime_type=normalized_mime,
94
+ byte_size=len(data),
95
+ max_bytes=max_bytes,
96
+ )
97
+ return PromptImageAttachment(
98
+ label=f"图片{index}",
99
+ mime_type=normalized_mime,
100
+ data_base64=base64.b64encode(data).decode("ascii"),
101
+ byte_size=len(data),
102
+ source=source,
103
+ )
104
+
105
+
106
+ def image_attachment_labels(attachments: list[PromptImageAttachment]) -> str:
107
+ return " ".join(attachment.display_label for attachment in attachments)
108
+
109
+
110
+ def format_user_prompt_display(prompt: str, attachments: list[PromptImageAttachment]) -> str:
111
+ labels = image_attachment_labels(attachments)
112
+ text = prompt.strip()
113
+ if text and labels:
114
+ return f"{text}\n{labels}"
115
+ return text or labels
116
+
117
+
118
+ def build_user_input(
119
+ prompt: str,
120
+ attachments: list[PromptImageAttachment] | None = None,
121
+ ) -> str | list[dict[str, Any]]:
122
+ image_attachments = list(attachments or [])
123
+ if not image_attachments:
124
+ return prompt
125
+ content: list[dict[str, str]] = []
126
+ if prompt.strip():
127
+ content.append({"type": "input_text", "text": prompt})
128
+ content.extend(attachment.to_input_image_block() for attachment in image_attachments)
129
+ return [{"role": "user", "content": content}]
130
+
131
+
132
+ def item_contains_image_content(item: Any) -> bool:
133
+ if isinstance(item, dict):
134
+ return _content_contains_image(item.get("content"))
135
+ return _content_contains_image(getattr(item, "content", None))
136
+
137
+
138
+ def items_contain_image_content(items: list[Any]) -> bool:
139
+ return any(item_contains_image_content(item) for item in items)
140
+
141
+
142
+ def strip_image_content_from_items(items: list[Any]) -> list[Any]:
143
+ stripped: list[Any] = []
144
+ for item in items:
145
+ cleaned = strip_image_content_from_item(item)
146
+ if cleaned is not None:
147
+ stripped.append(cleaned)
148
+ return stripped
149
+
150
+
151
+ def strip_image_content_from_item(item: Any) -> Any | None:
152
+ if not isinstance(item, dict) or "content" not in item:
153
+ return item
154
+ cleaned = dict(item)
155
+ content = _strip_image_content(item.get("content"))
156
+ if content is None:
157
+ return None
158
+ cleaned["content"] = content
159
+ return cleaned
160
+
161
+
162
+ def redacted_content_text(value: Any) -> str:
163
+ if isinstance(value, str):
164
+ return _redact_data_urls(value)
165
+ if isinstance(value, list):
166
+ parts: list[str] = []
167
+ image_index = 1
168
+ for part in value:
169
+ if not isinstance(part, dict):
170
+ continue
171
+ if _part_is_image(part):
172
+ parts.append(f"[图片{image_index}]")
173
+ image_index += 1
174
+ continue
175
+ text = _text_part(part)
176
+ if text:
177
+ parts.append(_redact_data_urls(text))
178
+ return "\n".join(parts)
179
+ if isinstance(value, dict):
180
+ if _part_is_image(value):
181
+ return "[图片1]"
182
+ text = _text_part(value)
183
+ return _redact_data_urls(text) if text else ""
184
+ return "" if value is None else _redact_data_urls(str(value))
185
+
186
+
187
+ def redact_image_data_urls(value: Any) -> Any:
188
+ if isinstance(value, str):
189
+ return _redact_data_urls(value)
190
+ if isinstance(value, list):
191
+ return [redact_image_data_urls(item) for item in value]
192
+ if isinstance(value, dict):
193
+ redacted = {key: redact_image_data_urls(item) for key, item in value.items()}
194
+ if _part_is_image(redacted):
195
+ if isinstance(redacted.get("image_url"), str):
196
+ redacted["image_url"] = "[图片]"
197
+ elif isinstance(redacted.get("image_url"), dict):
198
+ image_url = dict(redacted["image_url"])
199
+ image_url["url"] = "[图片]"
200
+ redacted["image_url"] = image_url
201
+ return redacted
202
+ return value
203
+
204
+
205
+ def normalize_multimodal_content_blocks(content: Any) -> Any:
206
+ if not isinstance(content, list):
207
+ return content
208
+ normalized: list[Any] = []
209
+ has_text = False
210
+ has_image = False
211
+ for part in content:
212
+ if not isinstance(part, dict):
213
+ normalized.append(part)
214
+ continue
215
+ part_type = part.get("type")
216
+ if part_type == "input_text":
217
+ text = part.get("text")
218
+ if text is None:
219
+ text = part.get("input_text")
220
+ text_value = text if isinstance(text, str) else ""
221
+ has_text = has_text or bool(text_value.strip())
222
+ normalized.append({"type": "text", "text": text_value})
223
+ continue
224
+ if part_type == "input_image":
225
+ image_url = part.get("image_url")
226
+ has_image = True
227
+ normalized.append(
228
+ {
229
+ "type": "image_url",
230
+ "image_url": {
231
+ "url": image_url if isinstance(image_url, str) else "",
232
+ },
233
+ }
234
+ )
235
+ continue
236
+ if _part_is_image(part):
237
+ has_image = True
238
+ elif (text := _text_part(part)).strip():
239
+ has_text = True
240
+ normalized.append(part)
241
+ if has_image and not has_text:
242
+ normalized.insert(0, {"type": "text", "text": IMAGE_ONLY_DEFAULT_TEXT})
243
+ return normalized
244
+
245
+
246
+ def _content_contains_image(content: Any) -> bool:
247
+ if isinstance(content, list):
248
+ return any(isinstance(part, dict) and _part_is_image(part) for part in content)
249
+ return isinstance(content, dict) and _part_is_image(content)
250
+
251
+
252
+ def _strip_image_content(content: Any) -> Any | None:
253
+ if isinstance(content, list):
254
+ parts = [
255
+ part
256
+ for part in content
257
+ if not (isinstance(part, dict) and _part_is_image(part))
258
+ ]
259
+ return parts or None
260
+ if isinstance(content, dict) and _part_is_image(content):
261
+ return None
262
+ return content
263
+
264
+
265
+ def _part_is_image(part: dict[str, Any]) -> bool:
266
+ part_type = part.get("type")
267
+ return part_type in {"input_image", "image", "image_url"} or "image_url" in part
268
+
269
+
270
+ def _text_part(part: dict[str, Any]) -> str:
271
+ for key in ("text", "input_text", "output_text", "refusal"):
272
+ value = part.get(key)
273
+ if isinstance(value, str):
274
+ return value
275
+ return ""
276
+
277
+
278
+ def _redact_data_urls(text: str) -> str:
279
+ return IMAGE_DATA_URL_RE.sub("data:image/...;base64,", text)
@@ -8,8 +8,14 @@ from agents import Model, ModelSettings
8
8
  from agents import OpenAIChatCompletionsModel
9
9
 
10
10
  from deepy.config import Settings
11
+ from deepy.config.settings import infer_provider_from_base_url
11
12
 
12
13
  from .cache_context import capture_sdk_request_shape
14
+ from .multimodal import (
15
+ items_contain_image_content,
16
+ model_supports_image_input,
17
+ strip_image_content_from_items,
18
+ )
13
19
  from .replay import (
14
20
  sanitize_chat_completion_stream_event,
15
21
  sanitize_model_input_for_chat_completions,
@@ -43,10 +49,21 @@ class DeepyOpenAIChatCompletionsModel(OpenAIChatCompletionsModel):
43
49
  *args: Any,
44
50
  **kwargs: Any,
45
51
  ) -> Any:
52
+ model_name = str(getattr(self, "model", ""))
53
+ base_url = str(getattr(self._get_client(), "base_url", "") or "")
54
+ inferred_provider = infer_provider_from_base_url(base_url) or (
55
+ "openrouter" if _is_openrouter_base_url(base_url) else ""
56
+ )
57
+ if (
58
+ isinstance(input, list)
59
+ and items_contain_image_content(input)
60
+ and not model_supports_image_input(inferred_provider, model_name)
61
+ ):
62
+ input = strip_image_content_from_items(input)
46
63
  capture_sdk_request_shape(
47
64
  system_instructions=system_instructions,
48
65
  input=input,
49
- model=str(getattr(self, "model", "")),
66
+ model=model_name,
50
67
  model_settings=args[0] if args else None,
51
68
  tools=args[1] if len(args) > 1 and isinstance(args[1], list) else None,
52
69
  mcp_servers=None,
@@ -4,6 +4,8 @@ from collections import Counter
4
4
  from collections.abc import Iterable
5
5
  from typing import Any, cast
6
6
 
7
+ from deepy.llm.multimodal import normalize_multimodal_content_blocks
8
+
7
9
 
8
10
  def sanitize_model_input_for_chat_completions(input_value: Any) -> Any:
9
11
  if not isinstance(input_value, list):
@@ -92,10 +94,18 @@ def _normalize_chat_tool_items(items: Iterable[Any]) -> list[Any]:
92
94
  }
93
95
  )
94
96
  continue
95
- normalized.append(item)
97
+ normalized.append(_normalize_multimodal_item(item))
96
98
  return normalized
97
99
 
98
100
 
101
+ def _normalize_multimodal_item(item: Any) -> Any:
102
+ if isinstance(item, dict) and "content" in item:
103
+ normalized = dict(item)
104
+ normalized["content"] = normalize_multimodal_content_blocks(item.get("content"))
105
+ return normalized
106
+ return item
107
+
108
+
99
109
  def sanitize_chat_completion_stream_event(event: Any) -> Any | None:
100
110
  if getattr(event, "type", None) == "response.output_item.done" and _is_empty_assistant_message(
101
111
  getattr(event, "item", None)
@@ -30,6 +30,11 @@ from .cache_context import (
30
30
  from .compaction import ContextCompactionError, ensure_context_ready
31
31
  from .context import build_session_input_callback
32
32
  from .events import DeepyStreamEvent, normalize_stream_event
33
+ from .multimodal import (
34
+ PromptImageAttachment,
35
+ build_user_input,
36
+ supports_image_input,
37
+ )
33
38
  from .provider import ProviderBundle, build_provider_bundle
34
39
 
35
40
  DEFAULT_MAX_TURNS = 100
@@ -73,6 +78,7 @@ async def run_prompt_once(
73
78
  list[ApprovalDecision] | Awaitable[list[ApprovalDecision]],
74
79
  ]
75
80
  | None = None,
81
+ image_attachments: list[PromptImageAttachment] | None = None,
76
82
  ) -> RunSummary:
77
83
  from agents import RunConfig, Runner
78
84
  from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError
@@ -86,6 +92,9 @@ async def run_prompt_once(
86
92
  )
87
93
  audit_policy = AuditPolicy(lambda: audit_state.mode, resolved_settings.audit)
88
94
  session = DeepySession.open(root, session_id) if session_id else DeepySession.create(root)
95
+ effective_image_attachments = (
96
+ list(image_attachments or []) if supports_image_input(resolved_settings) else []
97
+ )
89
98
  initial_todos, _ = normalize_todo_items(session.todo_state())
90
99
  runtime = ToolRuntime(
91
100
  cwd=root,
@@ -134,7 +143,7 @@ async def run_prompt_once(
134
143
  prefix_snapshot=prefix_snapshot,
135
144
  prefix_tools=list(getattr(agent, "tools", []) or []),
136
145
  prefix_mcp_servers=list(getattr(agent, "mcp_servers", []) or []),
137
- additional_input=prompt,
146
+ additional_input=build_user_input(prompt, effective_image_attachments),
138
147
  )
139
148
  except ContextCompactionError as exc:
140
149
  duration_ms = int((time.time() - started_at) * 1000) if "started_at" in locals() else 0
@@ -175,7 +184,7 @@ async def run_prompt_once(
175
184
  prefix_token: Any | None = None
176
185
  try:
177
186
  prefix_token = set_current_cache_prefix_snapshot(prefix_snapshot)
178
- run_input: Any = prompt
187
+ run_input: Any = build_user_input(prompt, effective_image_attachments)
179
188
  while True:
180
189
  result = Runner.run_streamed(
181
190
  agent,
@@ -527,7 +536,7 @@ def _approval_server_name(raw_item: Any, tool_name: str) -> str:
527
536
  def _approval_action_kind(tool_name: str) -> str:
528
537
  if tool_name in {"Write", "Update"}:
529
538
  return "text_write"
530
- if tool_name == "shell":
539
+ if tool_name in {"shell", "test_shell"}:
531
540
  return "command"
532
541
  if tool_name == "task_stop":
533
542
  return "background_task_control"
@@ -66,10 +66,9 @@ Core rules:
66
66
  `subagent_explore` for broad read-only investigation, `subagent_reviewer` for
67
67
  focused review, and `subagent_tester` for reproduction or verification. Keep
68
68
  Deepy responsible for final synthesis and do not delegate tiny one-step work.
69
- - If a subagent reports `test_shell` `approval_required`, ask the user through
70
- `AskUserQuestion` with the exact command, policy reason, and approval token.
71
- Retry only the same command through the constrained `test_shell` path after
72
- the user approves; do not broaden access to raw shell.
69
+ - If a subagent runs `test_shell`, let medium-risk command approvals surface
70
+ through Deepy's audit flow. Do not rerun a blocked tester command through raw
71
+ `shell`; keep verification inside the constrained `test_shell` path.
73
72
 
74
73
  Tool protocol:
75
74
  Tool results are JSON strings: ok, name, output, error, metadata, awaitUserResponse.
@@ -454,6 +454,14 @@ class DeepySession:
454
454
  increment_cache_generation: bool = False,
455
455
  reset_cache_generation: bool = False,
456
456
  ) -> None:
457
+ clear_usage_state = (
458
+ active_tokens is not None
459
+ and usage is None
460
+ and latest_context_window_tokens is None
461
+ and last_usage_tokens is None
462
+ and last_usage_record_count is None
463
+ and cache_usage is None
464
+ )
457
465
  with self._transaction() as conn:
458
466
  self._update_session_metadata(
459
467
  conn,
@@ -475,6 +483,19 @@ class DeepySession:
475
483
  increment_cache_generation=increment_cache_generation,
476
484
  reset_cache_generation=reset_cache_generation,
477
485
  )
486
+ if clear_usage_state:
487
+ conn.execute(
488
+ """
489
+ update sessions
490
+ set usage_json = null,
491
+ latest_context_window_tokens = null,
492
+ last_usage_tokens = null,
493
+ last_usage_record_count = null,
494
+ cache_usage_json = null
495
+ where id = ?
496
+ """,
497
+ (self.session_id,),
498
+ )
478
499
 
479
500
  @contextmanager
480
501
  def _transaction(self) -> Iterator[sqlite3.Connection]:
@@ -5,6 +5,7 @@ from typing import Any
5
5
 
6
6
  from deepy.todos import todo_state_from_tool_output
7
7
  from deepy.utils import json as json_utils
8
+ from deepy.llm.multimodal import redacted_content_text
8
9
 
9
10
  CONTEXT_UNDERCOUNT_REPAIR_RATIO = 2
10
11
  CONTEXT_UNDERCOUNT_REPAIR_MIN_DELTA = 128
@@ -173,16 +174,8 @@ def session_status(items: list[dict[str, Any]]) -> str:
173
174
  def item_text(item: dict[str, Any]) -> str:
174
175
  for key in ("content", "text", "output"):
175
176
  value = item.get(key)
176
- if isinstance(value, str):
177
- return value
178
- if isinstance(value, list):
179
- parts: list[str] = []
180
- for part in value:
181
- if isinstance(part, dict):
182
- text = part.get("text") or part.get("input_text")
183
- if isinstance(text, str):
184
- parts.append(text)
185
- return "".join(parts)
177
+ if value is not None:
178
+ return redacted_content_text(value)
186
179
  return ""
187
180
 
188
181