chcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,244 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING
6
+
7
+ from chcode.utils import get_text_content
8
+
9
+ if TYPE_CHECKING:
10
+ from langchain_core.messages import BaseMessage
11
+
12
+
13
+ DEFAULT_MAX_RESULT_CHARS = 50_000
14
+ MAX_RESULTS_PER_TURN_CHARS = 200_000
15
+ PREVIEW_MAX_CHARS = 2_000
16
+ PERSISTED_OUTPUT_TAG = "<persisted-output>"
17
+ PERSISTED_OUTPUT_CLOSING_TAG = "</persisted-output>"
18
+
19
+
20
+ def clean_tool_output(text: str) -> str:
21
+ if not text:
22
+ return text
23
+ if isinstance(text, list):
24
+ text = "\n".join(str(item) for item in text)
25
+ elif not isinstance(text, str):
26
+ text = str(text)
27
+ text = re.sub(r"\x1b\[[0-9;?]*[A-Za-z]", "", text)
28
+ text = re.sub(r"\x1b\][^\x07]*\x07?", "", text)
29
+ text = re.sub(r"<[^>]+>", "", text)
30
+ return text
31
+
32
+
33
+ def _content_size(content: str | list) -> int:
34
+ content = get_text_content(content)
35
+ return len(content.encode("utf-8"))
36
+
37
+
38
+ def _generate_preview(
39
+ content: str, max_chars: int = PREVIEW_MAX_CHARS
40
+ ) -> tuple[str, bool]:
41
+ if len(content) <= max_chars:
42
+ return content, False
43
+ truncated = content[:max_chars]
44
+ last_newline = truncated.rfind("\n")
45
+ cut_point = last_newline if last_newline > max_chars * 0.5 else max_chars
46
+ return content[:cut_point], True
47
+
48
+
49
+ def _persist_to_disk(
50
+ content: str, tool_use_id: str, workplace: Path | None
51
+ ) -> str | None:
52
+ if workplace is None:
53
+ return None
54
+ try:
55
+ result_dir = workplace / ".chat" / "tool-results"
56
+ result_dir.mkdir(parents=True, exist_ok=True)
57
+ safe_id = re.sub(r"[^a-zA-Z0-9_-]", "_", tool_use_id)
58
+ filepath = result_dir / f"{safe_id}.txt"
59
+ filepath.write_text(content, encoding="utf-8")
60
+ return str(filepath)
61
+ except Exception:
62
+ return None
63
+
64
+
65
+ def truncate_large_result(
66
+ content: str,
67
+ tool_name: str = "",
68
+ tool_use_id: str = "",
69
+ workplace: Path | None = None,
70
+ threshold: int = DEFAULT_MAX_RESULT_CHARS,
71
+ ) -> str:
72
+ if not content or not content.strip():
73
+ if content is not None and content != "" and content.strip() == "":
74
+ return f"({tool_name} completed with no output)"
75
+ return content
76
+
77
+ size = _content_size(content)
78
+ if size <= threshold:
79
+ return content
80
+
81
+ filepath = _persist_to_disk(content, tool_use_id, workplace)
82
+ preview, has_more = _generate_preview(content)
83
+ size_str = f"{size / 1024:.1f}KB" if size >= 1024 else f"{size}B"
84
+
85
+ if filepath:
86
+ message = (
87
+ f"{PERSISTED_OUTPUT_TAG}\n"
88
+ f"Output too large ({size_str}). Full output saved to: {filepath}\n\n"
89
+ f"Preview (first {PREVIEW_MAX_CHARS} chars):\n"
90
+ f"{preview}"
91
+ )
92
+ if has_more:
93
+ message += "\n..."
94
+ message += f"\n{PERSISTED_OUTPUT_CLOSING_TAG}"
95
+ return message
96
+
97
+ fallback_preview = content[:threshold]
98
+ message = (
99
+ f"{PERSISTED_OUTPUT_TAG}\n"
100
+ f"Output too large ({size_str}), truncated to {threshold} chars.\n\n"
101
+ f"{fallback_preview}\n"
102
+ f"...{PERSISTED_OUTPUT_CLOSING_TAG}"
103
+ )
104
+ return message
105
+
106
+
107
+ class BudgetState:
108
+ def __init__(self) -> None:
109
+ self.seen_ids: set[str] = set()
110
+ self.replacements: dict[str, str] = {}
111
+
112
+ def reset(self) -> None:
113
+ self.seen_ids.clear()
114
+ self.replacements.clear()
115
+
116
+
117
+ _budget_state: BudgetState | None = None
118
+
119
+
120
+ def get_budget_state() -> BudgetState:
121
+ global _budget_state
122
+ if _budget_state is None:
123
+ _budget_state = BudgetState()
124
+ return _budget_state
125
+
126
+
127
+ def reset_budget_state() -> None:
128
+ global _budget_state
129
+ _budget_state = BudgetState()
130
+
131
+
132
+ def _collect_tool_messages_by_turn(
133
+ messages: list[BaseMessage],
134
+ ) -> list[list[tuple[int, BaseMessage]]]:
135
+ from langchain_core.messages import AIMessage, ToolMessage
136
+
137
+ turns: list[list[tuple[int, BaseMessage]]] = []
138
+ current_turn: list[tuple[int, BaseMessage]] = []
139
+
140
+ for idx, msg in enumerate(messages):
141
+ if isinstance(msg, AIMessage) and msg.tool_calls:
142
+ if current_turn:
143
+ turns.append(current_turn)
144
+ current_turn = [(idx, msg)]
145
+ elif isinstance(msg, ToolMessage):
146
+ current_turn.append((idx, msg))
147
+ else:
148
+ if current_turn:
149
+ turns.append(current_turn)
150
+ current_turn = []
151
+
152
+ if current_turn:
153
+ turns.append(current_turn)
154
+
155
+ return turns
156
+
157
+
158
+ def _select_to_replace(
159
+ fresh: list[tuple[int, BaseMessage]],
160
+ frozen_size: int,
161
+ limit: int,
162
+ ) -> list[tuple[int, BaseMessage]]:
163
+ fresh_total = sum(_content_size(m.content or "") for _, m in fresh)
164
+ if frozen_size + fresh_total <= limit:
165
+ return []
166
+ deficit = frozen_size + fresh_total - limit
167
+ sorted_fresh = sorted(
168
+ fresh, key=lambda x: _content_size(x[1].content or ""), reverse=True
169
+ )
170
+ selected: list[tuple[int, BaseMessage]] = []
171
+ reclaimed = 0
172
+ for item in sorted_fresh:
173
+ selected.append(item)
174
+ reclaimed += _content_size(item[1].content or "")
175
+ if reclaimed >= deficit:
176
+ break
177
+ return selected
178
+
179
+
180
+ def enforce_per_turn_budget(
181
+ messages: list[BaseMessage],
182
+ budget: int = MAX_RESULTS_PER_TURN_CHARS,
183
+ workplace: Path | None = None,
184
+ state: BudgetState | None = None,
185
+ ) -> list[BaseMessage]:
186
+ from langchain_core.messages import ToolMessage
187
+
188
+ if not any(isinstance(m, ToolMessage) for m in messages):
189
+ return messages
190
+
191
+ if state is None:
192
+ state = get_budget_state()
193
+
194
+ turns = _collect_tool_messages_by_turn(messages)
195
+ replacement_map: dict[int, str] = {}
196
+
197
+ for turn in turns:
198
+ fresh: list[tuple[int, BaseMessage]] = []
199
+ frozen_size = 0
200
+
201
+ for idx, msg in turn:
202
+ if not isinstance(msg, ToolMessage):
203
+ continue
204
+ tool_use_id = msg.tool_call_id or ""
205
+ if tool_use_id in state.seen_ids:
206
+ if tool_use_id in state.replacements:
207
+ replacement_map[idx] = state.replacements[tool_use_id]
208
+ else:
209
+ frozen_size += _content_size(msg.content or "")
210
+ else:
211
+ fresh.append((idx, msg))
212
+
213
+ if not fresh:
214
+ continue
215
+
216
+ selected = _select_to_replace(fresh, frozen_size, budget)
217
+
218
+ non_selected = [item for item in fresh if item not in selected]
219
+ for idx, msg in non_selected:
220
+ state.seen_ids.add(msg.tool_call_id or "")
221
+
222
+ for idx, msg in selected:
223
+ tool_use_id = msg.tool_call_id or ""
224
+ state.seen_ids.add(tool_use_id)
225
+ content = get_text_content(msg.content or "")
226
+ result = truncate_large_result(
227
+ content,
228
+ msg.name or "",
229
+ tool_use_id,
230
+ workplace=workplace,
231
+ )
232
+ replacement_map[idx] = result
233
+ state.replacements[tool_use_id] = result
234
+
235
+ if not replacement_map:
236
+ return messages
237
+
238
+ result = []
239
+ for idx, msg in enumerate(messages):
240
+ if idx in replacement_map:
241
+ result.append(msg.model_copy(update={"content": replacement_map[idx]}))
242
+ else:
243
+ result.append(msg)
244
+ return result