chcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chcode/__init__.py +0 -0
- chcode/__main__.py +5 -0
- chcode/agent_setup.py +395 -0
- chcode/agents/__init__.py +0 -0
- chcode/agents/definitions.py +158 -0
- chcode/agents/loader.py +104 -0
- chcode/agents/runner.py +159 -0
- chcode/chat.py +1630 -0
- chcode/cli.py +142 -0
- chcode/config.py +571 -0
- chcode/display.py +325 -0
- chcode/prompts.py +640 -0
- chcode/session.py +149 -0
- chcode/skill_manager.py +165 -0
- chcode/utils/__init__.py +3 -0
- chcode/utils/enhanced_chat_openai.py +368 -0
- chcode/utils/git_checker.py +38 -0
- chcode/utils/git_manager.py +261 -0
- chcode/utils/modelscope_ratelimit.py +65 -0
- chcode/utils/multimodal.py +268 -0
- chcode/utils/shell/__init__.py +17 -0
- chcode/utils/shell/output.py +63 -0
- chcode/utils/shell/provider.py +128 -0
- chcode/utils/shell/result.py +14 -0
- chcode/utils/shell/semantics.py +55 -0
- chcode/utils/shell/session.py +159 -0
- chcode/utils/skill_loader.py +565 -0
- chcode/utils/text_utils.py +14 -0
- chcode/utils/tool_result_pipeline.py +244 -0
- chcode/utils/tools.py +1724 -0
- chcode/vision_config.py +371 -0
- chcode-0.1.0.dist-info/METADATA +275 -0
- chcode-0.1.0.dist-info/RECORD +36 -0
- chcode-0.1.0.dist-info/WHEEL +4 -0
- chcode-0.1.0.dist-info/entry_points.txt +2 -0
- chcode-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Git可用性检查工具
|
|
5
|
+
用于判断系统中Git是否可用
|
|
6
|
+
"""
|
|
7
|
+
import subprocess
|
|
8
|
+
from typing import Tuple, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def check_git_availability() -> Tuple[bool, str, Optional[str]]:
|
|
12
|
+
"""
|
|
13
|
+
检查Git是否可用
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Tuple[bool, str, Optional[str]]: (是否可用, 状态描述, Git版本信息)
|
|
17
|
+
"""
|
|
18
|
+
try:
|
|
19
|
+
result = subprocess.run(
|
|
20
|
+
["git", "--version"],
|
|
21
|
+
capture_output=True,
|
|
22
|
+
text=True,
|
|
23
|
+
timeout=10,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
if result.returncode == 0:
|
|
27
|
+
version_info = result.stdout.strip()
|
|
28
|
+
return True, "Git可用", version_info
|
|
29
|
+
else:
|
|
30
|
+
error_msg = result.stderr.strip() if result.stderr else "未知错误"
|
|
31
|
+
return False, f"Git命令执行失败: {error_msg}", None
|
|
32
|
+
|
|
33
|
+
except FileNotFoundError:
|
|
34
|
+
return False, "未找到Git命令,请确保Git已安装并添加到PATH环境变量中", None
|
|
35
|
+
except subprocess.TimeoutExpired:
|
|
36
|
+
return False, "Git命令执行超时", None
|
|
37
|
+
except Exception as e:
|
|
38
|
+
return False, f"检查Git时发生异常: {str(e)}", None
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
import subprocess
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
import json
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GitManager:
|
|
10
|
+
"""增强版Git检查点管理器,支持.gitignore管理"""
|
|
11
|
+
|
|
12
|
+
MINIMAL_GITIGNORE = ".git\n.chat\n.venv\n.gitignore\n__pycache__\n*.pyc\n.pytest_cache\n.coverage\n.pytest_cache/\n"
|
|
13
|
+
|
|
14
|
+
def __init__(self, repo_path: str = "."):
|
|
15
|
+
self.repo_path = Path(repo_path).resolve()
|
|
16
|
+
self.git_cmd = "git"
|
|
17
|
+
self.checkpoints_file = self.repo_path / ".git" / "checkpoints.json"
|
|
18
|
+
self.gitignore_file = self.repo_path / ".gitignore"
|
|
19
|
+
self.current_id = 0
|
|
20
|
+
self._is_repo: bool | None = None
|
|
21
|
+
|
|
22
|
+
def _run(
|
|
23
|
+
self, args: list, timeout: int = 30, silent: bool = True
|
|
24
|
+
) -> subprocess.CompletedProcess:
|
|
25
|
+
"""执行Git命令
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
args: Git 命令参数
|
|
29
|
+
timeout: 超时时间(秒)
|
|
30
|
+
silent: 是否静默输出(默认 True,不打印调试信息)
|
|
31
|
+
"""
|
|
32
|
+
try:
|
|
33
|
+
result = subprocess.run(
|
|
34
|
+
[self.git_cmd] + args,
|
|
35
|
+
cwd=str(self.repo_path),
|
|
36
|
+
capture_output=True,
|
|
37
|
+
text=True,
|
|
38
|
+
encoding="utf-8",
|
|
39
|
+
timeout=timeout,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
if result.returncode != 0 and not silent:
|
|
43
|
+
print(f"Git命令返回码: {result.returncode}")
|
|
44
|
+
if result.stderr:
|
|
45
|
+
print(f" STDERR: {result.stderr.strip()}")
|
|
46
|
+
if result.stdout:
|
|
47
|
+
print(f" STDOUT: {result.stdout.strip()}")
|
|
48
|
+
|
|
49
|
+
return result
|
|
50
|
+
except subprocess.TimeoutExpired:
|
|
51
|
+
raise RuntimeError(f"Git命令超时({timeout}秒): git {' '.join(args)}")
|
|
52
|
+
except Exception as e:
|
|
53
|
+
raise RuntimeError(f"Git命令执行失败: {e}")
|
|
54
|
+
|
|
55
|
+
def is_repo(self) -> bool:
|
|
56
|
+
"""检查是否为Git仓库"""
|
|
57
|
+
if self._is_repo is not None:
|
|
58
|
+
return self._is_repo
|
|
59
|
+
try:
|
|
60
|
+
self._is_repo = self._run(["rev-parse", "--git-dir"]).returncode == 0
|
|
61
|
+
return self._is_repo
|
|
62
|
+
except Exception:
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
def init(self) -> bool:
|
|
66
|
+
"""初始化Git仓库"""
|
|
67
|
+
if self.is_repo():
|
|
68
|
+
if not self.checkpoints_file.exists():
|
|
69
|
+
self.checkpoints_file.write_text(
|
|
70
|
+
json.dumps({}, indent=4), encoding="utf-8"
|
|
71
|
+
)
|
|
72
|
+
self._ensure_init_checkpoint()
|
|
73
|
+
return False
|
|
74
|
+
if not self.gitignore_file.exists():
|
|
75
|
+
self.create_gitignore()
|
|
76
|
+
result = self._run(["init"])
|
|
77
|
+
if result.returncode == 0:
|
|
78
|
+
# 初始空提交,确保后续 commit 不会因空仓库失败
|
|
79
|
+
self._run(["commit", "-m", "init", "--allow-empty"])
|
|
80
|
+
self._ensure_init_checkpoint()
|
|
81
|
+
return result.returncode == 0
|
|
82
|
+
|
|
83
|
+
def _ensure_init_checkpoint(self) -> None:
|
|
84
|
+
"""确保 checkpoints.json 中存在 "init" 条目,供 rollback 使用"""
|
|
85
|
+
if not self.checkpoints_file.exists():
|
|
86
|
+
self.checkpoints_file.write_text(
|
|
87
|
+
json.dumps({}, indent=4), encoding="utf-8"
|
|
88
|
+
)
|
|
89
|
+
data = json.loads(self.checkpoints_file.read_text(encoding="utf-8"))
|
|
90
|
+
if "init" in data:
|
|
91
|
+
return
|
|
92
|
+
hash_result = self._run(["rev-list", "--max-parents=0", "HEAD"])
|
|
93
|
+
if hash_result.returncode == 0 and hash_result.stdout.strip():
|
|
94
|
+
data["init"] = hash_result.stdout.strip().split("\n")[-1]
|
|
95
|
+
self.checkpoints_file.write_text(
|
|
96
|
+
json.dumps(data, indent=4), encoding="utf-8"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def add_commit(self, message_ids: str, files: list | None = None) -> bool | int:
|
|
100
|
+
"""添加文件并提交"""
|
|
101
|
+
if files is None:
|
|
102
|
+
files = ["."]
|
|
103
|
+
if self._run(["add"] + files).returncode != 0:
|
|
104
|
+
return False
|
|
105
|
+
|
|
106
|
+
# 提交
|
|
107
|
+
commit_msg = f"{message_ids} (CP#{self.current_id + 1})"
|
|
108
|
+
commit_result = self._run(["commit", "-m", commit_msg])
|
|
109
|
+
|
|
110
|
+
if commit_result.returncode == 0:
|
|
111
|
+
# 获取提交ID
|
|
112
|
+
hash_result = self._run(["rev-parse", "HEAD"])
|
|
113
|
+
if hash_result.returncode == 0:
|
|
114
|
+
commit_id = hash_result.stdout.strip()
|
|
115
|
+
|
|
116
|
+
checkpoint_dict = {}
|
|
117
|
+
checkpoint_dict[message_ids] = commit_id
|
|
118
|
+
if self.checkpoints_file.exists():
|
|
119
|
+
checkpoint_dict.update(
|
|
120
|
+
json.loads(self.checkpoints_file.read_text(encoding="utf-8"))
|
|
121
|
+
)
|
|
122
|
+
count = len(checkpoint_dict)
|
|
123
|
+
self.checkpoints_file.write_text(
|
|
124
|
+
json.dumps(checkpoint_dict, indent=4), encoding="utf-8"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
self.current_id += 1
|
|
128
|
+
return count
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
def rollback(self, message_ids: list[str], all_ids: list[str]) -> bool | int:
|
|
132
|
+
"""回滚到指定检查点
|
|
133
|
+
第一步:检查是否存在精确匹配(存在于JSON中有对应提交的ID),如果有则直接回溯到其上一次提交
|
|
134
|
+
第二步:如果没有精确匹配,才进入模糊匹配逻辑,按以下三种情况进行处理:
|
|
135
|
+
前有提交后有提交:直接回溯到前面最近的提交
|
|
136
|
+
前无提交:回溯到初始提交
|
|
137
|
+
前有提交后无提交:不回溯,返回当前计数
|
|
138
|
+
"""
|
|
139
|
+
if not self.checkpoints_file.exists():
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
json_data = self.checkpoints_file.read_text(encoding="utf-8")
|
|
143
|
+
checkpointer_dict: dict = json.loads(json_data)
|
|
144
|
+
|
|
145
|
+
message_ids_str = "&".join(message_ids)
|
|
146
|
+
|
|
147
|
+
# -- 辅助:根据 all_ids 位置将非 init 的 checkpoint 分为 before / at_or_after --
|
|
148
|
+
def _classify_checkpoint_keys():
|
|
149
|
+
before = []
|
|
150
|
+
at_or_after = []
|
|
151
|
+
fork_id = message_ids[0]
|
|
152
|
+
fork_index = all_ids.index(fork_id) if fork_id in all_ids else -1
|
|
153
|
+
|
|
154
|
+
for k in list(checkpointer_dict.keys()):
|
|
155
|
+
if k == "init":
|
|
156
|
+
continue
|
|
157
|
+
first_msg_id = k.split("&")[0]
|
|
158
|
+
if first_msg_id not in all_ids:
|
|
159
|
+
continue
|
|
160
|
+
idx = all_ids.index(first_msg_id)
|
|
161
|
+
if idx < fork_index:
|
|
162
|
+
before.append((idx, k))
|
|
163
|
+
else:
|
|
164
|
+
at_or_after.append(k)
|
|
165
|
+
|
|
166
|
+
before.sort(key=lambda x: x[0])
|
|
167
|
+
return before, at_or_after
|
|
168
|
+
|
|
169
|
+
# -- 第一步:精确匹配 --
|
|
170
|
+
if message_ids_str in checkpointer_dict:
|
|
171
|
+
aim_id = checkpointer_dict[message_ids_str] + "~1"
|
|
172
|
+
|
|
173
|
+
_, keys_to_remove = _classify_checkpoint_keys()
|
|
174
|
+
keys_to_remove_set = set(keys_to_remove)
|
|
175
|
+
keys_to_remove_set.add(message_ids_str)
|
|
176
|
+
|
|
177
|
+
for k in keys_to_remove_set:
|
|
178
|
+
checkpointer_dict.pop(k, None)
|
|
179
|
+
|
|
180
|
+
count = len(checkpointer_dict)
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
reset_result = self._run(["reset", "--hard", aim_id])
|
|
184
|
+
if reset_result.returncode == 0:
|
|
185
|
+
self.checkpoints_file.write_text(
|
|
186
|
+
json.dumps(checkpointer_dict, indent=4), encoding="utf-8"
|
|
187
|
+
)
|
|
188
|
+
return count
|
|
189
|
+
else:
|
|
190
|
+
return False
|
|
191
|
+
except Exception:
|
|
192
|
+
return False
|
|
193
|
+
|
|
194
|
+
# -- 第二步:模糊匹配 --
|
|
195
|
+
before_keys, at_or_after_keys = _classify_checkpoint_keys()
|
|
196
|
+
|
|
197
|
+
has_before = len(before_keys) > 0
|
|
198
|
+
has_after = len(at_or_after_keys) > 0
|
|
199
|
+
|
|
200
|
+
if has_before and has_after:
|
|
201
|
+
# Case 1:前有提交后有提交 -> 回溯到前面最近的提交(保留该提交本身的状态)
|
|
202
|
+
latest_before_key = before_keys[-1][1]
|
|
203
|
+
aim_id = checkpointer_dict[latest_before_key]
|
|
204
|
+
|
|
205
|
+
for k in at_or_after_keys:
|
|
206
|
+
checkpointer_dict.pop(k)
|
|
207
|
+
|
|
208
|
+
elif not has_before and has_after:
|
|
209
|
+
# Case 2:前无提交后有提交 -> 回溯到初始提交
|
|
210
|
+
aim_id = checkpointer_dict["init"]
|
|
211
|
+
|
|
212
|
+
for k in at_or_after_keys:
|
|
213
|
+
checkpointer_dict.pop(k)
|
|
214
|
+
|
|
215
|
+
elif has_before and not has_after:
|
|
216
|
+
# Case 3:前有提交后无提交 -> 不回溯
|
|
217
|
+
count = len(checkpointer_dict)
|
|
218
|
+
return count
|
|
219
|
+
else:
|
|
220
|
+
count = len(checkpointer_dict)
|
|
221
|
+
return count
|
|
222
|
+
|
|
223
|
+
count = len(checkpointer_dict)
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
reset_result = self._run(["reset", "--hard", aim_id])
|
|
227
|
+
if reset_result.returncode == 0:
|
|
228
|
+
self.checkpoints_file.write_text(
|
|
229
|
+
json.dumps(checkpointer_dict, indent=4), encoding="utf-8"
|
|
230
|
+
)
|
|
231
|
+
return count
|
|
232
|
+
else:
|
|
233
|
+
return False
|
|
234
|
+
except Exception:
|
|
235
|
+
return False
|
|
236
|
+
|
|
237
|
+
def count_checkpoints(self, count: int | None = None) -> int:
|
|
238
|
+
"""统计检查点数量"""
|
|
239
|
+
if count is None:
|
|
240
|
+
if not self.checkpoints_file.exists():
|
|
241
|
+
return 0
|
|
242
|
+
json_data = self.checkpoints_file.read_text(encoding="utf-8")
|
|
243
|
+
checkpointer_dict = json.loads(json_data)
|
|
244
|
+
return len(checkpointer_dict)
|
|
245
|
+
else:
|
|
246
|
+
return count
|
|
247
|
+
|
|
248
|
+
def create_gitignore(self, content: Optional[str] = None) -> bool:
|
|
249
|
+
"""创建.gitignore文件,屏蔽.git和.venv等"""
|
|
250
|
+
try:
|
|
251
|
+
if content is None:
|
|
252
|
+
content = self.MINIMAL_GITIGNORE
|
|
253
|
+
|
|
254
|
+
with open(self.gitignore_file, "w", encoding="utf-8") as f:
|
|
255
|
+
f.write(content)
|
|
256
|
+
|
|
257
|
+
self._run(["add", ".gitignore"])
|
|
258
|
+
return True
|
|
259
|
+
except Exception as e:
|
|
260
|
+
print(f"创建.gitignore失败: {e}")
|
|
261
|
+
return False
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""ModelScope API 调用次数监控(解耦模块)
|
|
2
|
+
|
|
3
|
+
通过自定义 httpx Transport 捕获响应头中的 ratelimit 信息,
|
|
4
|
+
供状态栏实时显示。仅在 base_url 包含 modelscope 时启用。
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
import threading
|
|
11
|
+
|
|
12
|
+
_ratelimit_data: dict = {}
|
|
13
|
+
_ratelimit_lock = threading.Lock()
|
|
14
|
+
|
|
15
|
+
_cached_sync: httpx.Client | None = None
|
|
16
|
+
_cached_async: httpx.AsyncClient | None = None
|
|
17
|
+
_client_lock = threading.Lock()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_ratelimit() -> dict:
|
|
21
|
+
with _ratelimit_lock:
|
|
22
|
+
return dict(_ratelimit_data) if _ratelimit_data else {}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def is_modelscope_model(model_config: dict) -> bool:
|
|
26
|
+
return "modelscope" in model_config.get("base_url", "").lower()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _update_ratelimit(headers: httpx.Headers) -> None:
|
|
30
|
+
total_limit = headers.get("modelscope-ratelimit-requests-limit")
|
|
31
|
+
if not total_limit:
|
|
32
|
+
return
|
|
33
|
+
try:
|
|
34
|
+
with _ratelimit_lock:
|
|
35
|
+
_ratelimit_data.update({
|
|
36
|
+
"total_limit": int(total_limit),
|
|
37
|
+
"total_remaining": int(headers.get("modelscope-ratelimit-requests-remaining", 0)),
|
|
38
|
+
"model_limit": int(headers.get("modelscope-ratelimit-model-requests-limit", 0)),
|
|
39
|
+
"model_remaining": int(headers.get("modelscope-ratelimit-model-requests-remaining", 0)),
|
|
40
|
+
})
|
|
41
|
+
except (ValueError, TypeError):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class _HeaderCaptureTransport(httpx.HTTPTransport):
|
|
46
|
+
def handle_request(self, request):
|
|
47
|
+
response = super().handle_request(request)
|
|
48
|
+
_update_ratelimit(response.headers)
|
|
49
|
+
return response
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class _HeaderCaptureAsyncTransport(httpx.AsyncHTTPTransport):
|
|
53
|
+
async def handle_async_request(self, request):
|
|
54
|
+
response = await super().handle_async_request(request)
|
|
55
|
+
_update_ratelimit(response.headers)
|
|
56
|
+
return response
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def get_modelscope_clients() -> tuple[httpx.Client, httpx.AsyncClient]:
|
|
60
|
+
global _cached_sync, _cached_async
|
|
61
|
+
with _client_lock:
|
|
62
|
+
if _cached_sync is None or _cached_async is None:
|
|
63
|
+
_cached_sync = httpx.Client(transport=_HeaderCaptureTransport())
|
|
64
|
+
_cached_async = httpx.AsyncClient(transport=_HeaderCaptureAsyncTransport())
|
|
65
|
+
return _cached_sync, _cached_async
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Multimodal model detection and media encoding utilities.
|
|
3
|
+
|
|
4
|
+
Provides:
|
|
5
|
+
- is_multimodal_model(): Check if a model name indicates native vision capability
|
|
6
|
+
- encode_media_as_base64(): Read and base64-encode an image or video
|
|
7
|
+
- extract_media_paths(): Detect image/video file paths in user text
|
|
8
|
+
- build_multimodal_message(): Construct a HumanMessage with embedded media
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import base64
|
|
14
|
+
import io
|
|
15
|
+
import re
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from langchain_core.messages import HumanMessage
|
|
19
|
+
|
|
20
|
+
# ─── Multimodal model patterns ──────────────────────────────────
|
|
21
|
+
|
|
22
|
+
# Models whose short name (after /) or full name matches these patterns
|
|
23
|
+
# are considered multimodal (native vision capability).
|
|
24
|
+
MULTIMODAL_MODEL_PATTERNS: list[str] = [
|
|
25
|
+
# Kimi K2.5 series
|
|
26
|
+
"Kimi-K2",
|
|
27
|
+
# Qwen3 VL (dedicated vision-language)
|
|
28
|
+
"Qwen3-VL",
|
|
29
|
+
# Qwen3.5 MoE models with vision
|
|
30
|
+
"Qwen3.5-397B",
|
|
31
|
+
"Qwen3.5-122B",
|
|
32
|
+
"Qwen3.5-35B",
|
|
33
|
+
"Qwen3.5-27B",
|
|
34
|
+
# Intern-S1 series
|
|
35
|
+
"Intern-S1",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def is_multimodal_model(model_name: str) -> bool:
|
|
40
|
+
"""Check if a model name indicates native multimodal (vision) capability.
|
|
41
|
+
|
|
42
|
+
Handles both short names (e.g., "Kimi-K2.5") and full names
|
|
43
|
+
(e.g., "moonshotai/Kimi-K2.5"). Case-insensitive.
|
|
44
|
+
"""
|
|
45
|
+
if not model_name:
|
|
46
|
+
return False
|
|
47
|
+
short_name = model_name.split("/")[-1]
|
|
48
|
+
lower_name = model_name.lower()
|
|
49
|
+
lower_short = short_name.lower()
|
|
50
|
+
for pattern in MULTIMODAL_MODEL_PATTERNS:
|
|
51
|
+
lower_pattern = pattern.lower()
|
|
52
|
+
if lower_pattern in lower_short or lower_pattern in lower_name:
|
|
53
|
+
return True
|
|
54
|
+
return False
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ─── Supported formats ─────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
_IMAGE_EXTS = frozenset({
|
|
60
|
+
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".tiff", ".tif",
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
_VIDEO_EXTS = frozenset({
|
|
64
|
+
".mp4", ".mov", ".avi", ".mkv", ".webm",
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
_ALL_MEDIA_EXTS = _IMAGE_EXTS | _VIDEO_EXTS
|
|
68
|
+
|
|
69
|
+
_VIDEO_EXT_NAMES = frozenset(e.lstrip(".") for e in _VIDEO_EXTS)
|
|
70
|
+
|
|
71
|
+
_MIME_MAP: dict[str, str] = {
|
|
72
|
+
"jpg": "image/jpeg",
|
|
73
|
+
"jpeg": "image/jpeg",
|
|
74
|
+
"png": "image/png",
|
|
75
|
+
"gif": "image/gif",
|
|
76
|
+
"bmp": "image/bmp",
|
|
77
|
+
"webp": "image/webp",
|
|
78
|
+
"tiff": "image/tiff",
|
|
79
|
+
"tif": "image/tiff",
|
|
80
|
+
"mp4": "video/mp4",
|
|
81
|
+
"mov": "video/quicktime",
|
|
82
|
+
"avi": "video/x-msvideo",
|
|
83
|
+
"mkv": "video/x-matroska",
|
|
84
|
+
"webm": "video/webm",
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# ─── Media encoding ────────────────────────────────────────────
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def encode_media_as_base64(
|
|
91
|
+
path: Path,
|
|
92
|
+
max_side: int = 2048,
|
|
93
|
+
) -> tuple[str, str]:
|
|
94
|
+
"""Read an image or video, return (base64_data, mime_type).
|
|
95
|
+
|
|
96
|
+
For images larger than max_side pixels, the image is resized
|
|
97
|
+
before encoding. Videos are encoded without modification.
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
FileNotFoundError: If the file does not exist.
|
|
101
|
+
ValueError: If the file is not valid or too large.
|
|
102
|
+
IOError: If the image cannot be read/decoded.
|
|
103
|
+
"""
|
|
104
|
+
if not path.exists():
|
|
105
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
106
|
+
if not path.is_file():
|
|
107
|
+
raise ValueError(f"Not a file: {path}")
|
|
108
|
+
|
|
109
|
+
ext = path.suffix.lower().lstrip(".")
|
|
110
|
+
if ext not in _MIME_MAP:
|
|
111
|
+
raise ValueError(f"Unsupported media format: {path.suffix}")
|
|
112
|
+
|
|
113
|
+
mime_type = _MIME_MAP.get(ext, "application/octet-stream")
|
|
114
|
+
|
|
115
|
+
is_video = ext in _VIDEO_EXT_NAMES
|
|
116
|
+
|
|
117
|
+
if is_video:
|
|
118
|
+
file_size = path.stat().st_size
|
|
119
|
+
if file_size > 14.9 * 1024 * 1024:
|
|
120
|
+
raise ValueError(
|
|
121
|
+
f"Video too large: {file_size / 1024 / 1024:.1f}MB (max 14.9MB)"
|
|
122
|
+
)
|
|
123
|
+
with open(path, "rb") as f:
|
|
124
|
+
b64 = base64.b64encode(f.read()).decode("utf-8")
|
|
125
|
+
else:
|
|
126
|
+
from PIL import Image
|
|
127
|
+
|
|
128
|
+
img = Image.open(path)
|
|
129
|
+
w, h = img.size
|
|
130
|
+
if max(w, h) > max_side:
|
|
131
|
+
scale = max_side / max(w, h)
|
|
132
|
+
img = img.resize((int(w * scale), int(h * scale)), Image.LANCZOS)
|
|
133
|
+
|
|
134
|
+
# 分辨率缩放后,如果体积仍超过 5MB,逐步降低 JPEG quality 压缩到 5MB 以内
|
|
135
|
+
MAX_BYTES = 5 * 1024 * 1024
|
|
136
|
+
buf = io.BytesIO()
|
|
137
|
+
img.save(buf, format=img.format or "PNG")
|
|
138
|
+
|
|
139
|
+
if buf.tell() > MAX_BYTES:
|
|
140
|
+
if img.mode in ("RGBA", "P", "LA"):
|
|
141
|
+
img = img.convert("RGB")
|
|
142
|
+
for quality in range(85, 4, -15):
|
|
143
|
+
buf = io.BytesIO()
|
|
144
|
+
img.save(buf, format="JPEG", quality=quality)
|
|
145
|
+
if buf.tell() <= MAX_BYTES:
|
|
146
|
+
break
|
|
147
|
+
mime_type = "image/jpeg"
|
|
148
|
+
|
|
149
|
+
b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
|
|
150
|
+
|
|
151
|
+
return b64, mime_type
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# ─── Media path extraction ─────────────────────────────────────
|
|
155
|
+
|
|
156
|
+
_MEDIA_EXT_PATTERN = "|".join(
|
|
157
|
+
re.escape(ext.lstrip("."))
|
|
158
|
+
for ext in sorted(_ALL_MEDIA_EXTS)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
_MEDIA_PATH_PATTERN = re.compile(
|
|
162
|
+
r"(?:"
|
|
163
|
+
# Quoted path: "path/to/image.png" or 'path\to\image.jpg'
|
|
164
|
+
# Use [^"']+ to allow spaces inside quoted paths
|
|
165
|
+
r'(["\'])([^"\']+\.(?:' + _MEDIA_EXT_PATTERN + r'))\1'
|
|
166
|
+
r"|"
|
|
167
|
+
# Bare path: must start with /, \, ~, ./, ../, or a drive letter (C:)
|
|
168
|
+
# This avoids matching URLs or code references like output.png
|
|
169
|
+
# Note: bare paths cannot contain spaces (use quotes for that)
|
|
170
|
+
r'((?:[/~\\]|[.]{1,2}[/\\]|[A-Za-z]:[/\\])[^\s]*\.(?:' + _MEDIA_EXT_PATTERN + r'))'
|
|
171
|
+
r")",
|
|
172
|
+
re.IGNORECASE,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def extract_media_paths(text: str, working_directory: Path) -> list[Path]:
|
|
177
|
+
"""Extract valid media file paths from user input text.
|
|
178
|
+
|
|
179
|
+
Returns paths that actually exist on disk, resolved relative
|
|
180
|
+
to working_directory. Deduplicates results.
|
|
181
|
+
"""
|
|
182
|
+
found: list[Path] = []
|
|
183
|
+
seen: set[str] = set()
|
|
184
|
+
|
|
185
|
+
for match in _MEDIA_PATH_PATTERN.finditer(text):
|
|
186
|
+
# Group 1-2: quoted path; Group 3: bare path
|
|
187
|
+
raw_path = match.group(2) or match.group(3)
|
|
188
|
+
if not raw_path:
|
|
189
|
+
continue
|
|
190
|
+
|
|
191
|
+
path = Path(raw_path).expanduser()
|
|
192
|
+
if not path.is_absolute():
|
|
193
|
+
path = working_directory / path
|
|
194
|
+
|
|
195
|
+
path_str = str(path.resolve())
|
|
196
|
+
if path_str in seen:
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
if (
|
|
200
|
+
path.exists()
|
|
201
|
+
and path.is_file()
|
|
202
|
+
and path.suffix.lower() in _ALL_MEDIA_EXTS
|
|
203
|
+
):
|
|
204
|
+
seen.add(path_str)
|
|
205
|
+
found.append(path)
|
|
206
|
+
|
|
207
|
+
return found
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# ─── Multimodal message builder ─────────────────────────────────
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def build_multimodal_message(
|
|
214
|
+
text: str,
|
|
215
|
+
media_paths: list[Path],
|
|
216
|
+
max_side: int = 2048,
|
|
217
|
+
) -> HumanMessage:
|
|
218
|
+
"""Build a HumanMessage with text and embedded media.
|
|
219
|
+
|
|
220
|
+
Image paths in text are replaced with [image: filename].
|
|
221
|
+
Video paths are replaced with [video: filename].
|
|
222
|
+
Images use image_url type, videos use video_url type.
|
|
223
|
+
"""
|
|
224
|
+
content_blocks: list[dict] = []
|
|
225
|
+
|
|
226
|
+
# Replace media paths in text with reference markers.
|
|
227
|
+
# Use single-pass replacement to avoid collision when multiple files
|
|
228
|
+
# share the same filename (e.g., dir1/test.png and dir2/test.png).
|
|
229
|
+
# Build a mapping from original text span → marker, then replace
|
|
230
|
+
# from longest matches first to prevent partial replacements.
|
|
231
|
+
replacements: list[tuple[str, str]] = [] # (original_text, marker)
|
|
232
|
+
for media_path in media_paths:
|
|
233
|
+
is_vid = media_path.suffix.lower() in _VIDEO_EXTS
|
|
234
|
+
marker = f"[video: {media_path.name}]" if is_vid else f"[image: {media_path.name}]"
|
|
235
|
+
matched = False
|
|
236
|
+
# Try quoted and full path representations
|
|
237
|
+
for sep in [f'"{media_path}"', f"'{media_path}'", str(media_path)]:
|
|
238
|
+
if sep in text:
|
|
239
|
+
replacements.append((sep, marker))
|
|
240
|
+
matched = True
|
|
241
|
+
break
|
|
242
|
+
if not matched and media_path.name in text:
|
|
243
|
+
replacements.append((media_path.name, marker))
|
|
244
|
+
|
|
245
|
+
# Sort by length descending so longer paths are replaced first
|
|
246
|
+
replacements.sort(key=lambda r: len(r[0]), reverse=True)
|
|
247
|
+
clean_text = text
|
|
248
|
+
for original, marker in replacements:
|
|
249
|
+
clean_text = clean_text.replace(original, marker, 1)
|
|
250
|
+
|
|
251
|
+
content_blocks.append({"type": "text", "text": clean_text})
|
|
252
|
+
|
|
253
|
+
for media_path in media_paths:
|
|
254
|
+
b64, mime = encode_media_as_base64(media_path, max_side=max_side)
|
|
255
|
+
data_url = f"data:{mime};base64,{b64}"
|
|
256
|
+
|
|
257
|
+
if media_path.suffix.lower() in _VIDEO_EXTS:
|
|
258
|
+
content_blocks.append({
|
|
259
|
+
"type": "video_url",
|
|
260
|
+
"video_url": {"url": data_url},
|
|
261
|
+
})
|
|
262
|
+
else:
|
|
263
|
+
content_blocks.append({
|
|
264
|
+
"type": "image_url",
|
|
265
|
+
"image_url": {"url": data_url},
|
|
266
|
+
})
|
|
267
|
+
|
|
268
|
+
return HumanMessage(content=content_blocks)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from chcode.utils.shell.output import TruncatedOutput, truncate_output
|
|
2
|
+
from chcode.utils.shell.provider import BashProvider, PowerShellProvider, ShellProvider
|
|
3
|
+
from chcode.utils.shell.result import ShellResult
|
|
4
|
+
from chcode.utils.shell.semantics import Interpretation, interpret_command_result
|
|
5
|
+
from chcode.utils.shell.session import ShellSession
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"ShellProvider",
|
|
9
|
+
"BashProvider",
|
|
10
|
+
"PowerShellProvider",
|
|
11
|
+
"ShellSession",
|
|
12
|
+
"ShellResult",
|
|
13
|
+
"Interpretation",
|
|
14
|
+
"interpret_command_result",
|
|
15
|
+
"TruncatedOutput",
|
|
16
|
+
"truncate_output",
|
|
17
|
+
]
|