deepy-cli 0.2.24__tar.gz → 0.2.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/PKG-INFO +1 -1
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/pyproject.toml +1 -1
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/__init__.py +1 -1
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/cli.py +2 -1
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/config/settings.py +3 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/test_shell.md +5 -3
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/llm/compaction.py +1 -1
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/llm/context.py +26 -1
- deepy_cli-0.2.26/src/deepy/llm/multimodal.py +279 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/llm/provider.py +18 -1
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/llm/replay.py +11 -1
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/llm/runner.py +12 -3
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/prompts/system.py +3 -4
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/sessions/session.py +21 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/sessions/store_helpers.py +3 -10
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tools/agents.py +67 -4
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tools/builtin.py +2 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tools/test_shell.py +21 -6
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tui/app.py +118 -15
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tui/screens.py +191 -2
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tui/widgets.py +151 -9
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/audit_approval_panel.py +8 -2
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/file_mentions.py +1 -2
- deepy_cli-0.2.26/src/deepy/ui/image_input.py +527 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/prompt_input.py +102 -3
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/terminal.py +50 -14
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/README.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/__main__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/audit.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/background_tasks.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/config/__init__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/__init__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/skills/skill-creator/SKILL.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/skills/skill-installer/SKILL.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/AskUserQuestion.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/Read.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/Search.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/Update.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/WebFetch.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/WebSearch.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/Write.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/__init__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/shell.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/task_list.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/task_output.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/task_stop.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/data/tools/todo_write.md +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/errors.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/input_suggestions.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/llm/__init__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/llm/agent.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/llm/cache_context.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/llm/events.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/llm/model_capabilities.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/llm/thinking.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/mcp.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/prompts/__init__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/prompts/compact.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/prompts/init_agents.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/prompts/rules.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/prompts/runtime_context.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/prompts/tool_docs.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/session_cost.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/sessions/__init__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/sessions/index.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/sessions/manager.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/skill_market.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/skills.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/status.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/subagents.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/todos.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tools/__init__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tools/file_state.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tools/result.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tools/search.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tools/shell_output.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tools/shell_utils.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tui/__init__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tui/commands.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tui/compat.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tui/diff.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tui/runner.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/tui/state.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/types/__init__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/types/sdk.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/types/tool_payloads.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/__init__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/app.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/ask_user_question.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/audit_approval_picker.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/exit_summary.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/loading_text.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/local_command.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/markdown.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/message_view.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/model_picker.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/prompt_buffer.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/session_list.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/session_picker.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/skill_picker.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/slash_commands.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/status_footer.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/styles.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/theme_picker.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/thinking_state.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/ui/welcome.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/update_check.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/usage.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/utils/__init__.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/utils/debug_logger.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/utils/error_logger.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/utils/json.py +0 -0
- {deepy_cli-0.2.24 → deepy_cli-0.2.26}/src/deepy/utils/notify.py +0 -0
|
@@ -30,6 +30,7 @@ from .config import (
|
|
|
30
30
|
from .config.settings import DEFAULT_UI_THEME, UI_THEMES
|
|
31
31
|
from .errors import format_error_display
|
|
32
32
|
from .llm.cache_context import format_cache_usage
|
|
33
|
+
from .llm.multimodal import redact_image_data_urls
|
|
33
34
|
from .llm.provider import build_provider_bundle
|
|
34
35
|
from .llm.runner import DEFAULT_MAX_TURNS, run_prompt_once
|
|
35
36
|
from .sessions import DeepySession, list_session_entries
|
|
@@ -614,7 +615,7 @@ def _cmd_sessions(args: argparse.Namespace) -> int:
|
|
|
614
615
|
else 0,
|
|
615
616
|
"cache_break_reason": entry.cache_break_reason if entry is not None else None,
|
|
616
617
|
"cache_usage": entry.cache_usage if entry is not None else None,
|
|
617
|
-
"items": items,
|
|
618
|
+
"items": redact_image_data_urls(items),
|
|
618
619
|
}
|
|
619
620
|
)
|
|
620
621
|
)
|
|
@@ -59,6 +59,7 @@ class ModelInfo:
|
|
|
59
59
|
label: str
|
|
60
60
|
description: str
|
|
61
61
|
supports_thinking: bool = True
|
|
62
|
+
supports_image_input: bool = False
|
|
62
63
|
default_reasoning_mode: str = "max"
|
|
63
64
|
|
|
64
65
|
|
|
@@ -102,6 +103,7 @@ OPENROUTER_MODEL_CATALOG = (
|
|
|
102
103
|
name="xiaomi/mimo-v2.5",
|
|
103
104
|
label="MiMo V2.5",
|
|
104
105
|
description="Xiaomi MiMo V2.5 via OpenRouter.",
|
|
106
|
+
supports_image_input=True,
|
|
105
107
|
default_reasoning_mode="enabled",
|
|
106
108
|
),
|
|
107
109
|
)
|
|
@@ -116,6 +118,7 @@ XIAOMI_MODEL_CATALOG = (
|
|
|
116
118
|
name="mimo-v2.5",
|
|
117
119
|
label="MiMo V2.5",
|
|
118
120
|
description="Xiaomi official MiMo V2.5.",
|
|
121
|
+
supports_image_input=True,
|
|
119
122
|
default_reasoning_mode="enabled",
|
|
120
123
|
),
|
|
121
124
|
)
|
|
@@ -11,6 +11,8 @@ redirection, command substitution, heredocs, and background operators.
|
|
|
11
11
|
|
|
12
12
|
Low-risk verification commands run immediately and return command, cwd,
|
|
13
13
|
exit-code, elapsed time, stdout, stderr, and truncation metadata. Medium-risk
|
|
14
|
-
commands
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
commands are routed through Deepy's outer audit approval flow when an audit
|
|
15
|
+
policy is active; after approval they still execute through this constrained
|
|
16
|
+
tool. Without an active audit policy, medium-risk commands return
|
|
17
|
+
`approval_required` with an `approvalToken` for same-command retry. Destructive,
|
|
18
|
+
publishing, mutating, or unsupported commands are denied.
|
|
@@ -132,7 +132,7 @@ async def ensure_context_ready(
|
|
|
132
132
|
prefix_snapshot: CachePrefixSnapshot | None = None,
|
|
133
133
|
prefix_tools: list[Any] | None = None,
|
|
134
134
|
prefix_mcp_servers: list[Any] | None = None,
|
|
135
|
-
additional_input:
|
|
135
|
+
additional_input: Any | None = None,
|
|
136
136
|
) -> ContextReadiness:
|
|
137
137
|
additional_tokens = estimate_tokens_for_item(additional_input or "")
|
|
138
138
|
state = session.context_token_state()
|
|
@@ -4,6 +4,8 @@ from math import ceil
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
from deepy.config import Settings
|
|
7
|
+
from deepy.llm.multimodal import item_contains_image_content, strip_image_content_from_items
|
|
8
|
+
from deepy.llm.multimodal import supports_image_input
|
|
7
9
|
from deepy.types.sdk import SessionInputCallback
|
|
8
10
|
from deepy.utils import json as json_utils
|
|
9
11
|
|
|
@@ -28,6 +30,8 @@ def estimate_tokens_for_text(text: str) -> int:
|
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
def estimate_tokens_for_item(item: Any) -> int:
|
|
33
|
+
if item_contains_image_content(item):
|
|
34
|
+
return _estimate_multimodal_item_tokens(item)
|
|
31
35
|
if isinstance(item, str):
|
|
32
36
|
return estimate_tokens_for_text(item)
|
|
33
37
|
if isinstance(item, dict):
|
|
@@ -37,13 +41,34 @@ def estimate_tokens_for_item(item: Any) -> int:
|
|
|
37
41
|
return estimate_tokens_for_text(str(item))
|
|
38
42
|
|
|
39
43
|
|
|
44
|
+
def _estimate_multimodal_item_tokens(item: Any) -> int:
|
|
45
|
+
if not isinstance(item, dict):
|
|
46
|
+
return estimate_tokens_for_text(str(item))
|
|
47
|
+
content = item.get("content")
|
|
48
|
+
if not isinstance(content, list):
|
|
49
|
+
return estimate_tokens_for_text(json_utils.dumps(item))
|
|
50
|
+
tokens = 0
|
|
51
|
+
for part in content:
|
|
52
|
+
if not isinstance(part, dict):
|
|
53
|
+
tokens += estimate_tokens_for_item(part)
|
|
54
|
+
continue
|
|
55
|
+
if part.get("type") in {"input_image", "image", "image_url"} or "image_url" in part:
|
|
56
|
+
tokens += 1024
|
|
57
|
+
continue
|
|
58
|
+
tokens += estimate_tokens_for_item(part)
|
|
59
|
+
return max(tokens, 1)
|
|
60
|
+
|
|
61
|
+
|
|
40
62
|
def estimate_tokens_for_items(items: list[dict[str, Any]]) -> int:
|
|
41
63
|
return sum(estimate_tokens_for_item(item) for item in items)
|
|
42
64
|
|
|
43
65
|
|
|
44
66
|
def build_session_input_callback(settings: Settings) -> SessionInputCallback:
|
|
45
67
|
def callback(history: list[Any], new_input: list[Any]) -> list[Any]:
|
|
46
|
-
|
|
68
|
+
items = [*history, *new_input]
|
|
69
|
+
if not supports_image_input(settings):
|
|
70
|
+
return strip_image_content_from_items(items)
|
|
71
|
+
return items
|
|
47
72
|
|
|
48
73
|
return callback
|
|
49
74
|
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from deepy.config import Settings
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
SUPPORTED_IMAGE_MIME_TYPES = frozenset(
|
|
12
|
+
{
|
|
13
|
+
"image/png",
|
|
14
|
+
"image/jpeg",
|
|
15
|
+
"image/webp",
|
|
16
|
+
"image/gif",
|
|
17
|
+
}
|
|
18
|
+
)
|
|
19
|
+
DEFAULT_MAX_IMAGE_BYTES = 50 * 1024 * 1024
|
|
20
|
+
UNSUPPORTED_IMAGE_INPUT_MESSAGE = "当前模型不支持图片输入,已忽略粘贴的图片。"
|
|
21
|
+
IMAGE_ONLY_DEFAULT_TEXT = "请描述这张图片的内容,不要执行工具或修改文件。"
|
|
22
|
+
IMAGE_DATA_URL_RE = re.compile(r"^data:image/[a-zA-Z0-9.+-]+;base64,", re.IGNORECASE)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ImageAttachmentError(ValueError):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class UnsupportedImageInputError(RuntimeError):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class PromptImageAttachment:
|
|
35
|
+
label: str
|
|
36
|
+
mime_type: str
|
|
37
|
+
data_base64: str
|
|
38
|
+
byte_size: int
|
|
39
|
+
source: str = "clipboard"
|
|
40
|
+
data_ref: str | None = None
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def display_label(self) -> str:
|
|
44
|
+
return f"[{self.label}]"
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def data_url(self) -> str:
|
|
48
|
+
return f"data:{self.mime_type};base64,{self.data_base64}"
|
|
49
|
+
|
|
50
|
+
def to_input_image_block(self) -> dict[str, str]:
|
|
51
|
+
return {"type": "input_image", "image_url": self.data_url}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def supports_image_input(settings: Settings) -> bool:
|
|
55
|
+
return model_supports_image_input(settings.model.provider, settings.model.name)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def model_supports_image_input(provider: str, model: str) -> bool:
|
|
59
|
+
normalized_provider = provider.strip().lower()
|
|
60
|
+
normalized_model = model.strip().lower()
|
|
61
|
+
if normalized_provider == "xiaomi":
|
|
62
|
+
return normalized_model == "mimo-v2.5"
|
|
63
|
+
if normalized_provider == "openrouter":
|
|
64
|
+
return normalized_model == "xiaomi/mimo-v2.5"
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def validate_image_attachment(
|
|
69
|
+
*,
|
|
70
|
+
mime_type: str,
|
|
71
|
+
byte_size: int,
|
|
72
|
+
max_bytes: int = DEFAULT_MAX_IMAGE_BYTES,
|
|
73
|
+
) -> None:
|
|
74
|
+
normalized_mime = mime_type.strip().lower()
|
|
75
|
+
if normalized_mime not in SUPPORTED_IMAGE_MIME_TYPES:
|
|
76
|
+
raise ImageAttachmentError(f"不支持的图片格式:{mime_type or 'unknown'}")
|
|
77
|
+
if byte_size <= 0:
|
|
78
|
+
raise ImageAttachmentError("图片为空,已忽略粘贴的图片。")
|
|
79
|
+
if byte_size > max_bytes:
|
|
80
|
+
raise ImageAttachmentError("图片过大,已忽略粘贴的图片。")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def build_prompt_image_attachment(
|
|
84
|
+
*,
|
|
85
|
+
data: bytes,
|
|
86
|
+
mime_type: str,
|
|
87
|
+
index: int,
|
|
88
|
+
source: str = "clipboard",
|
|
89
|
+
max_bytes: int = DEFAULT_MAX_IMAGE_BYTES,
|
|
90
|
+
) -> PromptImageAttachment:
|
|
91
|
+
normalized_mime = mime_type.strip().lower()
|
|
92
|
+
validate_image_attachment(
|
|
93
|
+
mime_type=normalized_mime,
|
|
94
|
+
byte_size=len(data),
|
|
95
|
+
max_bytes=max_bytes,
|
|
96
|
+
)
|
|
97
|
+
return PromptImageAttachment(
|
|
98
|
+
label=f"图片{index}",
|
|
99
|
+
mime_type=normalized_mime,
|
|
100
|
+
data_base64=base64.b64encode(data).decode("ascii"),
|
|
101
|
+
byte_size=len(data),
|
|
102
|
+
source=source,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def image_attachment_labels(attachments: list[PromptImageAttachment]) -> str:
|
|
107
|
+
return " ".join(attachment.display_label for attachment in attachments)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def format_user_prompt_display(prompt: str, attachments: list[PromptImageAttachment]) -> str:
|
|
111
|
+
labels = image_attachment_labels(attachments)
|
|
112
|
+
text = prompt.strip()
|
|
113
|
+
if text and labels:
|
|
114
|
+
return f"{text}\n{labels}"
|
|
115
|
+
return text or labels
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def build_user_input(
|
|
119
|
+
prompt: str,
|
|
120
|
+
attachments: list[PromptImageAttachment] | None = None,
|
|
121
|
+
) -> str | list[dict[str, Any]]:
|
|
122
|
+
image_attachments = list(attachments or [])
|
|
123
|
+
if not image_attachments:
|
|
124
|
+
return prompt
|
|
125
|
+
content: list[dict[str, str]] = []
|
|
126
|
+
if prompt.strip():
|
|
127
|
+
content.append({"type": "input_text", "text": prompt})
|
|
128
|
+
content.extend(attachment.to_input_image_block() for attachment in image_attachments)
|
|
129
|
+
return [{"role": "user", "content": content}]
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def item_contains_image_content(item: Any) -> bool:
|
|
133
|
+
if isinstance(item, dict):
|
|
134
|
+
return _content_contains_image(item.get("content"))
|
|
135
|
+
return _content_contains_image(getattr(item, "content", None))
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def items_contain_image_content(items: list[Any]) -> bool:
|
|
139
|
+
return any(item_contains_image_content(item) for item in items)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def strip_image_content_from_items(items: list[Any]) -> list[Any]:
|
|
143
|
+
stripped: list[Any] = []
|
|
144
|
+
for item in items:
|
|
145
|
+
cleaned = strip_image_content_from_item(item)
|
|
146
|
+
if cleaned is not None:
|
|
147
|
+
stripped.append(cleaned)
|
|
148
|
+
return stripped
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def strip_image_content_from_item(item: Any) -> Any | None:
|
|
152
|
+
if not isinstance(item, dict) or "content" not in item:
|
|
153
|
+
return item
|
|
154
|
+
cleaned = dict(item)
|
|
155
|
+
content = _strip_image_content(item.get("content"))
|
|
156
|
+
if content is None:
|
|
157
|
+
return None
|
|
158
|
+
cleaned["content"] = content
|
|
159
|
+
return cleaned
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def redacted_content_text(value: Any) -> str:
|
|
163
|
+
if isinstance(value, str):
|
|
164
|
+
return _redact_data_urls(value)
|
|
165
|
+
if isinstance(value, list):
|
|
166
|
+
parts: list[str] = []
|
|
167
|
+
image_index = 1
|
|
168
|
+
for part in value:
|
|
169
|
+
if not isinstance(part, dict):
|
|
170
|
+
continue
|
|
171
|
+
if _part_is_image(part):
|
|
172
|
+
parts.append(f"[图片{image_index}]")
|
|
173
|
+
image_index += 1
|
|
174
|
+
continue
|
|
175
|
+
text = _text_part(part)
|
|
176
|
+
if text:
|
|
177
|
+
parts.append(_redact_data_urls(text))
|
|
178
|
+
return "\n".join(parts)
|
|
179
|
+
if isinstance(value, dict):
|
|
180
|
+
if _part_is_image(value):
|
|
181
|
+
return "[图片1]"
|
|
182
|
+
text = _text_part(value)
|
|
183
|
+
return _redact_data_urls(text) if text else ""
|
|
184
|
+
return "" if value is None else _redact_data_urls(str(value))
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def redact_image_data_urls(value: Any) -> Any:
|
|
188
|
+
if isinstance(value, str):
|
|
189
|
+
return _redact_data_urls(value)
|
|
190
|
+
if isinstance(value, list):
|
|
191
|
+
return [redact_image_data_urls(item) for item in value]
|
|
192
|
+
if isinstance(value, dict):
|
|
193
|
+
redacted = {key: redact_image_data_urls(item) for key, item in value.items()}
|
|
194
|
+
if _part_is_image(redacted):
|
|
195
|
+
if isinstance(redacted.get("image_url"), str):
|
|
196
|
+
redacted["image_url"] = "[图片]"
|
|
197
|
+
elif isinstance(redacted.get("image_url"), dict):
|
|
198
|
+
image_url = dict(redacted["image_url"])
|
|
199
|
+
image_url["url"] = "[图片]"
|
|
200
|
+
redacted["image_url"] = image_url
|
|
201
|
+
return redacted
|
|
202
|
+
return value
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def normalize_multimodal_content_blocks(content: Any) -> Any:
|
|
206
|
+
if not isinstance(content, list):
|
|
207
|
+
return content
|
|
208
|
+
normalized: list[Any] = []
|
|
209
|
+
has_text = False
|
|
210
|
+
has_image = False
|
|
211
|
+
for part in content:
|
|
212
|
+
if not isinstance(part, dict):
|
|
213
|
+
normalized.append(part)
|
|
214
|
+
continue
|
|
215
|
+
part_type = part.get("type")
|
|
216
|
+
if part_type == "input_text":
|
|
217
|
+
text = part.get("text")
|
|
218
|
+
if text is None:
|
|
219
|
+
text = part.get("input_text")
|
|
220
|
+
text_value = text if isinstance(text, str) else ""
|
|
221
|
+
has_text = has_text or bool(text_value.strip())
|
|
222
|
+
normalized.append({"type": "text", "text": text_value})
|
|
223
|
+
continue
|
|
224
|
+
if part_type == "input_image":
|
|
225
|
+
image_url = part.get("image_url")
|
|
226
|
+
has_image = True
|
|
227
|
+
normalized.append(
|
|
228
|
+
{
|
|
229
|
+
"type": "image_url",
|
|
230
|
+
"image_url": {
|
|
231
|
+
"url": image_url if isinstance(image_url, str) else "",
|
|
232
|
+
},
|
|
233
|
+
}
|
|
234
|
+
)
|
|
235
|
+
continue
|
|
236
|
+
if _part_is_image(part):
|
|
237
|
+
has_image = True
|
|
238
|
+
elif (text := _text_part(part)).strip():
|
|
239
|
+
has_text = True
|
|
240
|
+
normalized.append(part)
|
|
241
|
+
if has_image and not has_text:
|
|
242
|
+
normalized.insert(0, {"type": "text", "text": IMAGE_ONLY_DEFAULT_TEXT})
|
|
243
|
+
return normalized
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _content_contains_image(content: Any) -> bool:
|
|
247
|
+
if isinstance(content, list):
|
|
248
|
+
return any(isinstance(part, dict) and _part_is_image(part) for part in content)
|
|
249
|
+
return isinstance(content, dict) and _part_is_image(content)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _strip_image_content(content: Any) -> Any | None:
|
|
253
|
+
if isinstance(content, list):
|
|
254
|
+
parts = [
|
|
255
|
+
part
|
|
256
|
+
for part in content
|
|
257
|
+
if not (isinstance(part, dict) and _part_is_image(part))
|
|
258
|
+
]
|
|
259
|
+
return parts or None
|
|
260
|
+
if isinstance(content, dict) and _part_is_image(content):
|
|
261
|
+
return None
|
|
262
|
+
return content
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _part_is_image(part: dict[str, Any]) -> bool:
|
|
266
|
+
part_type = part.get("type")
|
|
267
|
+
return part_type in {"input_image", "image", "image_url"} or "image_url" in part
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _text_part(part: dict[str, Any]) -> str:
|
|
271
|
+
for key in ("text", "input_text", "output_text", "refusal"):
|
|
272
|
+
value = part.get(key)
|
|
273
|
+
if isinstance(value, str):
|
|
274
|
+
return value
|
|
275
|
+
return ""
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _redact_data_urls(text: str) -> str:
|
|
279
|
+
return IMAGE_DATA_URL_RE.sub("data:image/...;base64,", text)
|
|
@@ -8,8 +8,14 @@ from agents import Model, ModelSettings
|
|
|
8
8
|
from agents import OpenAIChatCompletionsModel
|
|
9
9
|
|
|
10
10
|
from deepy.config import Settings
|
|
11
|
+
from deepy.config.settings import infer_provider_from_base_url
|
|
11
12
|
|
|
12
13
|
from .cache_context import capture_sdk_request_shape
|
|
14
|
+
from .multimodal import (
|
|
15
|
+
items_contain_image_content,
|
|
16
|
+
model_supports_image_input,
|
|
17
|
+
strip_image_content_from_items,
|
|
18
|
+
)
|
|
13
19
|
from .replay import (
|
|
14
20
|
sanitize_chat_completion_stream_event,
|
|
15
21
|
sanitize_model_input_for_chat_completions,
|
|
@@ -43,10 +49,21 @@ class DeepyOpenAIChatCompletionsModel(OpenAIChatCompletionsModel):
|
|
|
43
49
|
*args: Any,
|
|
44
50
|
**kwargs: Any,
|
|
45
51
|
) -> Any:
|
|
52
|
+
model_name = str(getattr(self, "model", ""))
|
|
53
|
+
base_url = str(getattr(self._get_client(), "base_url", "") or "")
|
|
54
|
+
inferred_provider = infer_provider_from_base_url(base_url) or (
|
|
55
|
+
"openrouter" if _is_openrouter_base_url(base_url) else ""
|
|
56
|
+
)
|
|
57
|
+
if (
|
|
58
|
+
isinstance(input, list)
|
|
59
|
+
and items_contain_image_content(input)
|
|
60
|
+
and not model_supports_image_input(inferred_provider, model_name)
|
|
61
|
+
):
|
|
62
|
+
input = strip_image_content_from_items(input)
|
|
46
63
|
capture_sdk_request_shape(
|
|
47
64
|
system_instructions=system_instructions,
|
|
48
65
|
input=input,
|
|
49
|
-
model=
|
|
66
|
+
model=model_name,
|
|
50
67
|
model_settings=args[0] if args else None,
|
|
51
68
|
tools=args[1] if len(args) > 1 and isinstance(args[1], list) else None,
|
|
52
69
|
mcp_servers=None,
|
|
@@ -4,6 +4,8 @@ from collections import Counter
|
|
|
4
4
|
from collections.abc import Iterable
|
|
5
5
|
from typing import Any, cast
|
|
6
6
|
|
|
7
|
+
from deepy.llm.multimodal import normalize_multimodal_content_blocks
|
|
8
|
+
|
|
7
9
|
|
|
8
10
|
def sanitize_model_input_for_chat_completions(input_value: Any) -> Any:
|
|
9
11
|
if not isinstance(input_value, list):
|
|
@@ -92,10 +94,18 @@ def _normalize_chat_tool_items(items: Iterable[Any]) -> list[Any]:
|
|
|
92
94
|
}
|
|
93
95
|
)
|
|
94
96
|
continue
|
|
95
|
-
normalized.append(item)
|
|
97
|
+
normalized.append(_normalize_multimodal_item(item))
|
|
96
98
|
return normalized
|
|
97
99
|
|
|
98
100
|
|
|
101
|
+
def _normalize_multimodal_item(item: Any) -> Any:
|
|
102
|
+
if isinstance(item, dict) and "content" in item:
|
|
103
|
+
normalized = dict(item)
|
|
104
|
+
normalized["content"] = normalize_multimodal_content_blocks(item.get("content"))
|
|
105
|
+
return normalized
|
|
106
|
+
return item
|
|
107
|
+
|
|
108
|
+
|
|
99
109
|
def sanitize_chat_completion_stream_event(event: Any) -> Any | None:
|
|
100
110
|
if getattr(event, "type", None) == "response.output_item.done" and _is_empty_assistant_message(
|
|
101
111
|
getattr(event, "item", None)
|
|
@@ -30,6 +30,11 @@ from .cache_context import (
|
|
|
30
30
|
from .compaction import ContextCompactionError, ensure_context_ready
|
|
31
31
|
from .context import build_session_input_callback
|
|
32
32
|
from .events import DeepyStreamEvent, normalize_stream_event
|
|
33
|
+
from .multimodal import (
|
|
34
|
+
PromptImageAttachment,
|
|
35
|
+
build_user_input,
|
|
36
|
+
supports_image_input,
|
|
37
|
+
)
|
|
33
38
|
from .provider import ProviderBundle, build_provider_bundle
|
|
34
39
|
|
|
35
40
|
DEFAULT_MAX_TURNS = 100
|
|
@@ -73,6 +78,7 @@ async def run_prompt_once(
|
|
|
73
78
|
list[ApprovalDecision] | Awaitable[list[ApprovalDecision]],
|
|
74
79
|
]
|
|
75
80
|
| None = None,
|
|
81
|
+
image_attachments: list[PromptImageAttachment] | None = None,
|
|
76
82
|
) -> RunSummary:
|
|
77
83
|
from agents import RunConfig, Runner
|
|
78
84
|
from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError
|
|
@@ -86,6 +92,9 @@ async def run_prompt_once(
|
|
|
86
92
|
)
|
|
87
93
|
audit_policy = AuditPolicy(lambda: audit_state.mode, resolved_settings.audit)
|
|
88
94
|
session = DeepySession.open(root, session_id) if session_id else DeepySession.create(root)
|
|
95
|
+
effective_image_attachments = (
|
|
96
|
+
list(image_attachments or []) if supports_image_input(resolved_settings) else []
|
|
97
|
+
)
|
|
89
98
|
initial_todos, _ = normalize_todo_items(session.todo_state())
|
|
90
99
|
runtime = ToolRuntime(
|
|
91
100
|
cwd=root,
|
|
@@ -134,7 +143,7 @@ async def run_prompt_once(
|
|
|
134
143
|
prefix_snapshot=prefix_snapshot,
|
|
135
144
|
prefix_tools=list(getattr(agent, "tools", []) or []),
|
|
136
145
|
prefix_mcp_servers=list(getattr(agent, "mcp_servers", []) or []),
|
|
137
|
-
additional_input=prompt,
|
|
146
|
+
additional_input=build_user_input(prompt, effective_image_attachments),
|
|
138
147
|
)
|
|
139
148
|
except ContextCompactionError as exc:
|
|
140
149
|
duration_ms = int((time.time() - started_at) * 1000) if "started_at" in locals() else 0
|
|
@@ -175,7 +184,7 @@ async def run_prompt_once(
|
|
|
175
184
|
prefix_token: Any | None = None
|
|
176
185
|
try:
|
|
177
186
|
prefix_token = set_current_cache_prefix_snapshot(prefix_snapshot)
|
|
178
|
-
run_input: Any = prompt
|
|
187
|
+
run_input: Any = build_user_input(prompt, effective_image_attachments)
|
|
179
188
|
while True:
|
|
180
189
|
result = Runner.run_streamed(
|
|
181
190
|
agent,
|
|
@@ -527,7 +536,7 @@ def _approval_server_name(raw_item: Any, tool_name: str) -> str:
|
|
|
527
536
|
def _approval_action_kind(tool_name: str) -> str:
|
|
528
537
|
if tool_name in {"Write", "Update"}:
|
|
529
538
|
return "text_write"
|
|
530
|
-
if tool_name
|
|
539
|
+
if tool_name in {"shell", "test_shell"}:
|
|
531
540
|
return "command"
|
|
532
541
|
if tool_name == "task_stop":
|
|
533
542
|
return "background_task_control"
|
|
@@ -66,10 +66,9 @@ Core rules:
|
|
|
66
66
|
`subagent_explore` for broad read-only investigation, `subagent_reviewer` for
|
|
67
67
|
focused review, and `subagent_tester` for reproduction or verification. Keep
|
|
68
68
|
Deepy responsible for final synthesis and do not delegate tiny one-step work.
|
|
69
|
-
- If a subagent
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
the user approves; do not broaden access to raw shell.
|
|
69
|
+
- If a subagent runs `test_shell`, let medium-risk command approvals surface
|
|
70
|
+
through Deepy's audit flow. Do not rerun a blocked tester command through raw
|
|
71
|
+
`shell`; keep verification inside the constrained `test_shell` path.
|
|
73
72
|
|
|
74
73
|
Tool protocol:
|
|
75
74
|
Tool results are JSON strings: ok, name, output, error, metadata, awaitUserResponse.
|
|
@@ -454,6 +454,14 @@ class DeepySession:
|
|
|
454
454
|
increment_cache_generation: bool = False,
|
|
455
455
|
reset_cache_generation: bool = False,
|
|
456
456
|
) -> None:
|
|
457
|
+
clear_usage_state = (
|
|
458
|
+
active_tokens is not None
|
|
459
|
+
and usage is None
|
|
460
|
+
and latest_context_window_tokens is None
|
|
461
|
+
and last_usage_tokens is None
|
|
462
|
+
and last_usage_record_count is None
|
|
463
|
+
and cache_usage is None
|
|
464
|
+
)
|
|
457
465
|
with self._transaction() as conn:
|
|
458
466
|
self._update_session_metadata(
|
|
459
467
|
conn,
|
|
@@ -475,6 +483,19 @@ class DeepySession:
|
|
|
475
483
|
increment_cache_generation=increment_cache_generation,
|
|
476
484
|
reset_cache_generation=reset_cache_generation,
|
|
477
485
|
)
|
|
486
|
+
if clear_usage_state:
|
|
487
|
+
conn.execute(
|
|
488
|
+
"""
|
|
489
|
+
update sessions
|
|
490
|
+
set usage_json = null,
|
|
491
|
+
latest_context_window_tokens = null,
|
|
492
|
+
last_usage_tokens = null,
|
|
493
|
+
last_usage_record_count = null,
|
|
494
|
+
cache_usage_json = null
|
|
495
|
+
where id = ?
|
|
496
|
+
""",
|
|
497
|
+
(self.session_id,),
|
|
498
|
+
)
|
|
478
499
|
|
|
479
500
|
@contextmanager
|
|
480
501
|
def _transaction(self) -> Iterator[sqlite3.Connection]:
|
|
@@ -5,6 +5,7 @@ from typing import Any
|
|
|
5
5
|
|
|
6
6
|
from deepy.todos import todo_state_from_tool_output
|
|
7
7
|
from deepy.utils import json as json_utils
|
|
8
|
+
from deepy.llm.multimodal import redacted_content_text
|
|
8
9
|
|
|
9
10
|
CONTEXT_UNDERCOUNT_REPAIR_RATIO = 2
|
|
10
11
|
CONTEXT_UNDERCOUNT_REPAIR_MIN_DELTA = 128
|
|
@@ -173,16 +174,8 @@ def session_status(items: list[dict[str, Any]]) -> str:
|
|
|
173
174
|
def item_text(item: dict[str, Any]) -> str:
|
|
174
175
|
for key in ("content", "text", "output"):
|
|
175
176
|
value = item.get(key)
|
|
176
|
-
if
|
|
177
|
-
return value
|
|
178
|
-
if isinstance(value, list):
|
|
179
|
-
parts: list[str] = []
|
|
180
|
-
for part in value:
|
|
181
|
-
if isinstance(part, dict):
|
|
182
|
-
text = part.get("text") or part.get("input_text")
|
|
183
|
-
if isinstance(text, str):
|
|
184
|
-
parts.append(text)
|
|
185
|
-
return "".join(parts)
|
|
177
|
+
if value is not None:
|
|
178
|
+
return redacted_content_text(value)
|
|
186
179
|
return ""
|
|
187
180
|
|
|
188
181
|
|