ripperdoc 0.2.0__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ripperdoc/__init__.py +1 -1
- ripperdoc/cli/cli.py +74 -9
- ripperdoc/cli/commands/__init__.py +4 -0
- ripperdoc/cli/commands/agents_cmd.py +30 -4
- ripperdoc/cli/commands/context_cmd.py +11 -1
- ripperdoc/cli/commands/cost_cmd.py +5 -0
- ripperdoc/cli/commands/doctor_cmd.py +208 -0
- ripperdoc/cli/commands/memory_cmd.py +202 -0
- ripperdoc/cli/commands/models_cmd.py +61 -6
- ripperdoc/cli/commands/resume_cmd.py +4 -2
- ripperdoc/cli/commands/status_cmd.py +1 -1
- ripperdoc/cli/commands/tasks_cmd.py +27 -0
- ripperdoc/cli/ui/rich_ui.py +258 -11
- ripperdoc/cli/ui/thinking_spinner.py +128 -0
- ripperdoc/core/agents.py +14 -4
- ripperdoc/core/config.py +56 -3
- ripperdoc/core/default_tools.py +16 -2
- ripperdoc/core/permissions.py +19 -0
- ripperdoc/core/providers/__init__.py +31 -0
- ripperdoc/core/providers/anthropic.py +136 -0
- ripperdoc/core/providers/base.py +187 -0
- ripperdoc/core/providers/gemini.py +172 -0
- ripperdoc/core/providers/openai.py +142 -0
- ripperdoc/core/query.py +510 -386
- ripperdoc/core/query_utils.py +578 -0
- ripperdoc/core/system_prompt.py +2 -1
- ripperdoc/core/tool.py +16 -1
- ripperdoc/sdk/client.py +12 -1
- ripperdoc/tools/background_shell.py +63 -21
- ripperdoc/tools/bash_tool.py +48 -13
- ripperdoc/tools/file_edit_tool.py +20 -0
- ripperdoc/tools/file_read_tool.py +23 -0
- ripperdoc/tools/file_write_tool.py +20 -0
- ripperdoc/tools/glob_tool.py +59 -15
- ripperdoc/tools/grep_tool.py +7 -0
- ripperdoc/tools/ls_tool.py +246 -73
- ripperdoc/tools/mcp_tools.py +32 -10
- ripperdoc/tools/multi_edit_tool.py +23 -0
- ripperdoc/tools/notebook_edit_tool.py +18 -3
- ripperdoc/tools/task_tool.py +7 -0
- ripperdoc/tools/todo_tool.py +157 -25
- ripperdoc/tools/tool_search_tool.py +17 -4
- ripperdoc/utils/file_watch.py +134 -0
- ripperdoc/utils/git_utils.py +274 -0
- ripperdoc/utils/json_utils.py +27 -0
- ripperdoc/utils/log.py +129 -29
- ripperdoc/utils/mcp.py +71 -6
- ripperdoc/utils/memory.py +12 -1
- ripperdoc/utils/message_compaction.py +22 -5
- ripperdoc/utils/messages.py +72 -17
- ripperdoc/utils/output_utils.py +34 -9
- ripperdoc/utils/permissions/path_validation_utils.py +6 -0
- ripperdoc/utils/prompt.py +17 -0
- ripperdoc/utils/safe_get_cwd.py +4 -0
- ripperdoc/utils/session_history.py +27 -9
- ripperdoc/utils/session_usage.py +7 -0
- ripperdoc/utils/shell_utils.py +159 -0
- ripperdoc/utils/todo.py +2 -2
- {ripperdoc-0.2.0.dist-info → ripperdoc-0.2.3.dist-info}/METADATA +4 -2
- ripperdoc-0.2.3.dist-info/RECORD +95 -0
- ripperdoc-0.2.0.dist-info/RECORD +0 -81
- {ripperdoc-0.2.0.dist-info → ripperdoc-0.2.3.dist-info}/WHEEL +0 -0
- {ripperdoc-0.2.0.dist-info → ripperdoc-0.2.3.dist-info}/entry_points.txt +0 -0
- {ripperdoc-0.2.0.dist-info → ripperdoc-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {ripperdoc-0.2.0.dist-info → ripperdoc-0.2.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""JSON helper utilities for Ripperdoc."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
from ripperdoc.utils.log import get_logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
logger = get_logger()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def safe_parse_json(json_text: Optional[str], log_error: bool = True) -> Optional[Any]:
|
|
15
|
+
"""Best-effort JSON.parse wrapper that returns None on failure."""
|
|
16
|
+
if not json_text:
|
|
17
|
+
return None
|
|
18
|
+
try:
|
|
19
|
+
return json.loads(json_text)
|
|
20
|
+
except Exception as exc:
|
|
21
|
+
if log_error:
|
|
22
|
+
logger.debug(
|
|
23
|
+
"[json_utils] Failed to parse JSON",
|
|
24
|
+
extra={"error": str(exc), "length": len(json_text)},
|
|
25
|
+
exc_info=True,
|
|
26
|
+
)
|
|
27
|
+
return None
|
ripperdoc/utils/log.py
CHANGED
|
@@ -1,11 +1,64 @@
|
|
|
1
1
|
"""Logging utilities for Ripperdoc."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
import logging
|
|
4
5
|
import sys
|
|
5
6
|
import os
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Optional
|
|
8
7
|
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Optional
|
|
10
|
+
|
|
11
|
+
from ripperdoc.utils.path_utils import sanitize_project_path
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_LOG_RECORD_FIELDS = {
|
|
15
|
+
"name",
|
|
16
|
+
"msg",
|
|
17
|
+
"args",
|
|
18
|
+
"levelname",
|
|
19
|
+
"levelno",
|
|
20
|
+
"pathname",
|
|
21
|
+
"filename",
|
|
22
|
+
"module",
|
|
23
|
+
"exc_info",
|
|
24
|
+
"exc_text",
|
|
25
|
+
"stack_info",
|
|
26
|
+
"lineno",
|
|
27
|
+
"funcName",
|
|
28
|
+
"created",
|
|
29
|
+
"msecs",
|
|
30
|
+
"relativeCreated",
|
|
31
|
+
"thread",
|
|
32
|
+
"threadName",
|
|
33
|
+
"processName",
|
|
34
|
+
"process",
|
|
35
|
+
"message",
|
|
36
|
+
"asctime",
|
|
37
|
+
"stacklevel",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class StructuredFormatter(logging.Formatter):
|
|
42
|
+
"""Formatter with ISO timestamps and context."""
|
|
43
|
+
|
|
44
|
+
def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str:
|
|
45
|
+
timestamp = datetime.utcfromtimestamp(record.created)
|
|
46
|
+
return timestamp.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
|
47
|
+
|
|
48
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
49
|
+
message = super().format(record)
|
|
50
|
+
extras = {
|
|
51
|
+
key: value
|
|
52
|
+
for key, value in record.__dict__.items()
|
|
53
|
+
if key not in _LOG_RECORD_FIELDS and not key.startswith("_")
|
|
54
|
+
}
|
|
55
|
+
if extras:
|
|
56
|
+
try:
|
|
57
|
+
serialized = json.dumps(extras, sort_keys=True, ensure_ascii=True, default=str)
|
|
58
|
+
except Exception:
|
|
59
|
+
serialized = str(extras)
|
|
60
|
+
return f"{message} | {serialized}"
|
|
61
|
+
return message
|
|
9
62
|
|
|
10
63
|
|
|
11
64
|
class RipperdocLogger:
|
|
@@ -15,46 +68,72 @@ class RipperdocLogger:
|
|
|
15
68
|
self.logger = logging.getLogger(name)
|
|
16
69
|
level_name = os.getenv("RIPPERDOC_LOG_LEVEL", "WARNING").upper()
|
|
17
70
|
level = getattr(logging, level_name, logging.WARNING)
|
|
18
|
-
|
|
71
|
+
# Allow file handlers to capture debug logs while console respects the configured level.
|
|
72
|
+
self.logger.setLevel(logging.DEBUG)
|
|
73
|
+
self.logger.propagate = False
|
|
74
|
+
|
|
75
|
+
# Avoid adding duplicate handlers if an existing logger is reused.
|
|
76
|
+
if not self.logger.handlers:
|
|
77
|
+
console_handler = logging.StreamHandler(sys.stderr)
|
|
78
|
+
console_handler.setLevel(level)
|
|
79
|
+
console_formatter = logging.Formatter("%(levelname)s: %(message)s")
|
|
80
|
+
console_handler.setFormatter(console_formatter)
|
|
81
|
+
self.logger.addHandler(console_handler)
|
|
19
82
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
console_handler.setLevel(level)
|
|
23
|
-
console_formatter = logging.Formatter("%(levelname)s: %(message)s")
|
|
24
|
-
console_handler.setFormatter(console_formatter)
|
|
25
|
-
self.logger.addHandler(console_handler)
|
|
83
|
+
self._file_handler: Optional[logging.Handler] = None
|
|
84
|
+
self._file_handler_path: Optional[Path] = None
|
|
26
85
|
|
|
27
|
-
# File handler (optional)
|
|
28
86
|
if log_dir:
|
|
29
|
-
log_dir.mkdir(exist_ok=True)
|
|
87
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
30
88
|
log_file = log_dir / f"ripperdoc_{datetime.now().strftime('%Y%m%d')}.log"
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
89
|
+
self.attach_file_handler(log_file)
|
|
90
|
+
|
|
91
|
+
def attach_file_handler(self, log_file: Path) -> Path:
|
|
92
|
+
"""Attach or replace a file handler for logging to disk."""
|
|
93
|
+
log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
94
|
+
if self._file_handler and self._file_handler_path == log_file:
|
|
95
|
+
return log_file
|
|
96
|
+
|
|
97
|
+
if self._file_handler:
|
|
98
|
+
try:
|
|
99
|
+
self.logger.removeHandler(self._file_handler)
|
|
100
|
+
except Exception:
|
|
101
|
+
# Swallow errors while rotating handlers; console logging should continue.
|
|
102
|
+
self.logger.exception("[logging] Failed to remove existing file handler")
|
|
103
|
+
|
|
104
|
+
# Use UTF-8 to avoid Windows code page encoding errors when logs contain non-ASCII text.
|
|
105
|
+
file_handler = logging.FileHandler(log_file, encoding="utf-8")
|
|
106
|
+
file_handler.setLevel(logging.DEBUG)
|
|
107
|
+
file_formatter = StructuredFormatter("%(asctime)s [%(levelname)s] %(message)s")
|
|
108
|
+
file_handler.setFormatter(file_formatter)
|
|
109
|
+
self.logger.addHandler(file_handler)
|
|
110
|
+
self._file_handler = file_handler
|
|
111
|
+
self._file_handler_path = log_file
|
|
112
|
+
return log_file
|
|
113
|
+
|
|
114
|
+
def debug(self, message: str, *args: Any, **kwargs: Any) -> None:
|
|
40
115
|
"""Log debug message."""
|
|
41
|
-
self.logger.debug(message)
|
|
116
|
+
self.logger.debug(message, *args, **kwargs)
|
|
42
117
|
|
|
43
|
-
def info(self, message: str) -> None:
|
|
118
|
+
def info(self, message: str, *args: Any, **kwargs: Any) -> None:
|
|
44
119
|
"""Log info message."""
|
|
45
|
-
self.logger.info(message)
|
|
120
|
+
self.logger.info(message, *args, **kwargs)
|
|
46
121
|
|
|
47
|
-
def warning(self, message: str) -> None:
|
|
122
|
+
def warning(self, message: str, *args: Any, **kwargs: Any) -> None:
|
|
48
123
|
"""Log warning message."""
|
|
49
|
-
self.logger.warning(message)
|
|
124
|
+
self.logger.warning(message, *args, **kwargs)
|
|
50
125
|
|
|
51
|
-
def error(self, message: str) -> None:
|
|
126
|
+
def error(self, message: str, *args: Any, **kwargs: Any) -> None:
|
|
52
127
|
"""Log error message."""
|
|
53
|
-
self.logger.error(message)
|
|
128
|
+
self.logger.error(message, *args, **kwargs)
|
|
54
129
|
|
|
55
|
-
def critical(self, message: str) -> None:
|
|
130
|
+
def critical(self, message: str, *args: Any, **kwargs: Any) -> None:
|
|
56
131
|
"""Log critical message."""
|
|
57
|
-
self.logger.critical(message)
|
|
132
|
+
self.logger.critical(message, *args, **kwargs)
|
|
133
|
+
|
|
134
|
+
def exception(self, message: str, *args: Any, **kwargs: Any) -> None:
|
|
135
|
+
"""Log an exception with traceback."""
|
|
136
|
+
self.logger.exception(message, *args, **kwargs)
|
|
58
137
|
|
|
59
138
|
|
|
60
139
|
# Global logger instance
|
|
@@ -69,8 +148,29 @@ def get_logger() -> RipperdocLogger:
|
|
|
69
148
|
return _logger
|
|
70
149
|
|
|
71
150
|
|
|
151
|
+
def _normalize_path_for_logs(project_path: Path) -> Path:
|
|
152
|
+
"""Return the directory for log files for a given project."""
|
|
153
|
+
safe_name = sanitize_project_path(project_path)
|
|
154
|
+
return Path.home() / ".ripperdoc" / "logs" / safe_name
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def session_log_path(project_path: Path, session_id: str, when: Optional[datetime] = None) -> Path:
|
|
158
|
+
"""Build the log file path for a project session."""
|
|
159
|
+
timestamp = (when or datetime.now()).strftime("%Y%m%d-%H%M%S")
|
|
160
|
+
return _normalize_path_for_logs(project_path) / f"{timestamp}-{session_id}.log"
|
|
161
|
+
|
|
162
|
+
|
|
72
163
|
def init_logger(log_dir: Optional[Path] = None) -> RipperdocLogger:
|
|
73
164
|
"""Initialize the global logger."""
|
|
74
165
|
global _logger
|
|
75
166
|
_logger = RipperdocLogger(log_dir=log_dir)
|
|
76
167
|
return _logger
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def enable_session_file_logging(project_path: Path, session_id: str) -> Path:
|
|
171
|
+
"""Ensure the global logger writes to the session-specific log file."""
|
|
172
|
+
logger = get_logger()
|
|
173
|
+
log_file = session_log_path(project_path, session_id)
|
|
174
|
+
logger.attach_file_handler(log_file)
|
|
175
|
+
logger.debug(f"[logging] File logging enabled at {log_file}")
|
|
176
|
+
return log_file
|
ripperdoc/utils/mcp.py
CHANGED
|
@@ -14,6 +14,8 @@ from ripperdoc import __version__
|
|
|
14
14
|
from ripperdoc.utils.log import get_logger
|
|
15
15
|
from ripperdoc.utils.message_compaction import estimate_tokens_from_text
|
|
16
16
|
|
|
17
|
+
logger = get_logger()
|
|
18
|
+
|
|
17
19
|
try:
|
|
18
20
|
import mcp.types as mcp_types
|
|
19
21
|
from mcp.client.session import ClientSession
|
|
@@ -26,9 +28,7 @@ except Exception: # pragma: no cover - handled gracefully at runtime
|
|
|
26
28
|
MCP_AVAILABLE = False
|
|
27
29
|
ClientSession = object # type: ignore
|
|
28
30
|
mcp_types = None # type: ignore
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
logger = get_logger()
|
|
31
|
+
logger.exception("[mcp] MCP SDK not available at import time")
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
@dataclass
|
|
@@ -76,8 +76,8 @@ def _load_json_file(path: Path) -> Dict[str, Any]:
|
|
|
76
76
|
if isinstance(data, dict):
|
|
77
77
|
return data
|
|
78
78
|
return {}
|
|
79
|
-
except (OSError, json.JSONDecodeError)
|
|
80
|
-
logger.
|
|
79
|
+
except (OSError, json.JSONDecodeError):
|
|
80
|
+
logger.exception("Failed to load JSON", extra={"path": str(path)})
|
|
81
81
|
return {}
|
|
82
82
|
|
|
83
83
|
|
|
@@ -89,6 +89,10 @@ def _ensure_str_dict(raw: object) -> Dict[str, str]:
|
|
|
89
89
|
try:
|
|
90
90
|
result[str(key)] = str(value)
|
|
91
91
|
except Exception:
|
|
92
|
+
logger.exception(
|
|
93
|
+
"[mcp] Failed to coerce env/header value to string",
|
|
94
|
+
extra={"key": key, "value": value},
|
|
95
|
+
)
|
|
92
96
|
continue
|
|
93
97
|
return result
|
|
94
98
|
|
|
@@ -154,6 +158,14 @@ def _load_server_configs(project_path: Optional[Path]) -> Dict[str, McpServerInf
|
|
|
154
158
|
for path in candidates:
|
|
155
159
|
data = _load_json_file(path)
|
|
156
160
|
merged.update(_parse_servers(data))
|
|
161
|
+
logger.debug(
|
|
162
|
+
"[mcp] Loaded MCP server configs",
|
|
163
|
+
extra={
|
|
164
|
+
"project_path": str(project_path),
|
|
165
|
+
"server_count": len(merged),
|
|
166
|
+
"candidates": [str(path) for path in candidates],
|
|
167
|
+
},
|
|
168
|
+
)
|
|
157
169
|
return merged
|
|
158
170
|
|
|
159
171
|
|
|
@@ -168,6 +180,14 @@ class McpRuntime:
|
|
|
168
180
|
self._closed = False
|
|
169
181
|
|
|
170
182
|
async def connect(self, configs: Dict[str, McpServerInfo]) -> List[McpServerInfo]:
|
|
183
|
+
logger.info(
|
|
184
|
+
"[mcp] Connecting to MCP servers",
|
|
185
|
+
extra={
|
|
186
|
+
"project_path": str(self.project_path),
|
|
187
|
+
"server_count": len(configs),
|
|
188
|
+
"servers": list(configs.keys()),
|
|
189
|
+
},
|
|
190
|
+
)
|
|
171
191
|
await self._exit_stack.__aenter__()
|
|
172
192
|
if not MCP_AVAILABLE:
|
|
173
193
|
for config in configs.values():
|
|
@@ -182,6 +202,14 @@ class McpRuntime:
|
|
|
182
202
|
|
|
183
203
|
for config in configs.values():
|
|
184
204
|
self.servers.append(await self._connect_server(config))
|
|
205
|
+
logger.debug(
|
|
206
|
+
"[mcp] MCP connection summary",
|
|
207
|
+
extra={
|
|
208
|
+
"connected": [s.name for s in self.servers if s.status == "connected"],
|
|
209
|
+
"failed": [s.name for s in self.servers if s.status == "failed"],
|
|
210
|
+
"unavailable": [s.name for s in self.servers if s.status == "unavailable"],
|
|
211
|
+
},
|
|
212
|
+
)
|
|
185
213
|
return self.servers
|
|
186
214
|
|
|
187
215
|
async def _list_roots_callback(self, *_: Any, **__: Any) -> Optional[Any]:
|
|
@@ -201,6 +229,15 @@ class McpRuntime:
|
|
|
201
229
|
try:
|
|
202
230
|
read_stream = None
|
|
203
231
|
write_stream = None
|
|
232
|
+
logger.debug(
|
|
233
|
+
"[mcp] Connecting server",
|
|
234
|
+
extra={
|
|
235
|
+
"server": config.name,
|
|
236
|
+
"type": config.type,
|
|
237
|
+
"command": config.command,
|
|
238
|
+
"url": config.url,
|
|
239
|
+
},
|
|
240
|
+
)
|
|
204
241
|
|
|
205
242
|
if config.type in ("sse", "sse-ide"):
|
|
206
243
|
if not config.url:
|
|
@@ -280,8 +317,21 @@ class McpRuntime:
|
|
|
280
317
|
for resource in resources_result.resources
|
|
281
318
|
]
|
|
282
319
|
|
|
320
|
+
logger.info(
|
|
321
|
+
"[mcp] Connected to MCP server",
|
|
322
|
+
extra={
|
|
323
|
+
"server": config.name,
|
|
324
|
+
"status": info.status,
|
|
325
|
+
"tools": len(info.tools),
|
|
326
|
+
"resources": len(info.resources),
|
|
327
|
+
"capabilities": list(info.capabilities.keys()),
|
|
328
|
+
},
|
|
329
|
+
)
|
|
283
330
|
except Exception as exc: # pragma: no cover - network/process errors
|
|
284
|
-
logger.
|
|
331
|
+
logger.exception(
|
|
332
|
+
"Failed to connect to MCP server",
|
|
333
|
+
extra={"server": config.name, "error": str(exc)},
|
|
334
|
+
)
|
|
285
335
|
info.status = "failed"
|
|
286
336
|
info.error = str(exc)
|
|
287
337
|
|
|
@@ -291,6 +341,10 @@ class McpRuntime:
|
|
|
291
341
|
if self._closed:
|
|
292
342
|
return
|
|
293
343
|
self._closed = True
|
|
344
|
+
logger.debug(
|
|
345
|
+
"[mcp] Shutting down MCP runtime",
|
|
346
|
+
extra={"project_path": str(self.project_path), "session_count": len(self.sessions)},
|
|
347
|
+
)
|
|
294
348
|
try:
|
|
295
349
|
await self._exit_stack.aclose()
|
|
296
350
|
finally:
|
|
@@ -316,12 +370,23 @@ async def ensure_mcp_runtime(project_path: Optional[Path] = None) -> McpRuntime:
|
|
|
316
370
|
runtime = _get_runtime()
|
|
317
371
|
project_path = project_path or Path.cwd()
|
|
318
372
|
if runtime and not runtime._closed and runtime.project_path == project_path:
|
|
373
|
+
logger.debug(
|
|
374
|
+
"[mcp] Reusing existing MCP runtime",
|
|
375
|
+
extra={
|
|
376
|
+
"project_path": str(project_path),
|
|
377
|
+
"server_count": len(runtime.servers),
|
|
378
|
+
},
|
|
379
|
+
)
|
|
319
380
|
return runtime
|
|
320
381
|
|
|
321
382
|
if runtime:
|
|
322
383
|
await runtime.aclose()
|
|
323
384
|
|
|
324
385
|
runtime = McpRuntime(project_path)
|
|
386
|
+
logger.debug(
|
|
387
|
+
"[mcp] Creating MCP runtime",
|
|
388
|
+
extra={"project_path": str(project_path)},
|
|
389
|
+
)
|
|
325
390
|
configs = _load_server_configs(project_path)
|
|
326
391
|
await runtime.connect(configs)
|
|
327
392
|
_runtime_var.set(runtime)
|
ripperdoc/utils/memory.py
CHANGED
|
@@ -6,6 +6,9 @@ import re
|
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import List, Optional, Set
|
|
9
|
+
from ripperdoc.utils.log import get_logger
|
|
10
|
+
|
|
11
|
+
logger = get_logger()
|
|
9
12
|
|
|
10
13
|
MEMORY_FILE_NAME = "AGENTS.md"
|
|
11
14
|
LOCAL_MEMORY_FILE_NAME = "AGENTS.local.md"
|
|
@@ -43,6 +46,10 @@ def _is_path_under_directory(path: Path, directory: Path) -> bool:
|
|
|
43
46
|
path.resolve().relative_to(directory.resolve())
|
|
44
47
|
return True
|
|
45
48
|
except Exception:
|
|
49
|
+
logger.exception(
|
|
50
|
+
"[memory] Failed to compare path containment",
|
|
51
|
+
extra={"path": str(path), "directory": str(directory)},
|
|
52
|
+
)
|
|
46
53
|
return False
|
|
47
54
|
|
|
48
55
|
|
|
@@ -65,8 +72,10 @@ def _read_file_with_type(file_path: Path, file_type: str) -> Optional[MemoryFile
|
|
|
65
72
|
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
66
73
|
return MemoryFile(path=str(file_path), type=file_type, content=content)
|
|
67
74
|
except PermissionError:
|
|
75
|
+
logger.exception("[memory] Permission error reading file", extra={"path": str(file_path)})
|
|
68
76
|
return None
|
|
69
77
|
except OSError:
|
|
78
|
+
logger.exception("[memory] OS error reading file", extra={"path": str(file_path)})
|
|
70
79
|
return None
|
|
71
80
|
|
|
72
81
|
|
|
@@ -114,7 +123,9 @@ def _collect_files(
|
|
|
114
123
|
try:
|
|
115
124
|
resolved_path = resolved_path.resolve()
|
|
116
125
|
except Exception:
|
|
117
|
-
|
|
126
|
+
logger.exception(
|
|
127
|
+
"[memory] Failed to resolve memory file path", extra={"path": str(resolved_path)}
|
|
128
|
+
)
|
|
118
129
|
|
|
119
130
|
resolved_key = str(resolved_path)
|
|
120
131
|
if resolved_key in visited:
|
|
@@ -22,7 +22,7 @@ logger = get_logger()
|
|
|
22
22
|
|
|
23
23
|
ConversationMessage = Union[UserMessage, AssistantMessage, ProgressMessage]
|
|
24
24
|
|
|
25
|
-
# Compaction thresholds
|
|
25
|
+
# Compaction thresholds.
|
|
26
26
|
MAX_TOKENS_SOFT = 20_000
|
|
27
27
|
MAX_TOKENS_HARD = 40_000
|
|
28
28
|
MAX_TOOL_USES_TO_PRESERVE = 3
|
|
@@ -172,6 +172,9 @@ def _stringify_content(content: Union[str, List[MessageContent], None]) -> str:
|
|
|
172
172
|
try:
|
|
173
173
|
parts.append(json.dumps(part.get("input"), ensure_ascii=False))
|
|
174
174
|
except Exception:
|
|
175
|
+
logger.exception(
|
|
176
|
+
"[message_compaction] Failed to serialize tool_use input for token estimate"
|
|
177
|
+
)
|
|
175
178
|
parts.append(str(part.get("input")))
|
|
176
179
|
|
|
177
180
|
# OpenAI-style arguments blocks
|
|
@@ -225,7 +228,10 @@ def _estimate_tool_schema_tokens(tools: Sequence[Any]) -> int:
|
|
|
225
228
|
schema_text = json.dumps(schema, sort_keys=True)
|
|
226
229
|
total += estimate_tokens_from_text(schema_text)
|
|
227
230
|
except Exception as exc:
|
|
228
|
-
logger.
|
|
231
|
+
logger.exception(
|
|
232
|
+
"Failed to estimate tokens for tool schema",
|
|
233
|
+
extra={"tool": getattr(tool, "name", None), "error": str(exc)},
|
|
234
|
+
)
|
|
229
235
|
continue
|
|
230
236
|
return total
|
|
231
237
|
|
|
@@ -303,7 +309,7 @@ def get_context_usage_status(
|
|
|
303
309
|
max_context_tokens: Optional[int],
|
|
304
310
|
auto_compact_enabled: bool,
|
|
305
311
|
) -> ContextUsageStatus:
|
|
306
|
-
"""Compute context usage thresholds
|
|
312
|
+
"""Compute context usage thresholds using the compaction heuristics."""
|
|
307
313
|
context_limit = max(max_context_tokens or DEFAULT_CONTEXT_TOKENS, MIN_CONTEXT_TOKENS)
|
|
308
314
|
effective_limit = (
|
|
309
315
|
max(MIN_CONTEXT_TOKENS, context_limit - AUTO_COMPACT_BUFFER)
|
|
@@ -396,6 +402,7 @@ def find_latest_assistant_usage_tokens(
|
|
|
396
402
|
if tokens > 0:
|
|
397
403
|
return tokens
|
|
398
404
|
except Exception:
|
|
405
|
+
logger.debug("[message_compaction] Failed to parse usage tokens", exc_info=True)
|
|
399
406
|
continue
|
|
400
407
|
return 0
|
|
401
408
|
|
|
@@ -432,7 +439,7 @@ def _run_cleanup_callbacks() -> None:
|
|
|
432
439
|
try:
|
|
433
440
|
callback()
|
|
434
441
|
except Exception as exc:
|
|
435
|
-
logger.debug(f"[message_compaction] Cleanup callback failed: {exc}")
|
|
442
|
+
logger.debug(f"[message_compaction] Cleanup callback failed: {exc}", exc_info=True)
|
|
436
443
|
|
|
437
444
|
|
|
438
445
|
def _normalize_tool_use_id(block: Any) -> str:
|
|
@@ -641,16 +648,26 @@ def compact_messages(
|
|
|
641
648
|
_processed_tool_use_ids.add(id_to_remove)
|
|
642
649
|
|
|
643
650
|
tokens_after = estimate_conversation_tokens(compacted_messages, protocol=protocol)
|
|
651
|
+
tokens_saved = max(0, tokens_before - tokens_after)
|
|
644
652
|
|
|
645
653
|
if ids_to_remove:
|
|
646
654
|
_is_compacting = True
|
|
647
655
|
_run_cleanup_callbacks()
|
|
656
|
+
logger.debug(
|
|
657
|
+
"[message_compaction] Compacted conversation",
|
|
658
|
+
extra={
|
|
659
|
+
"tokens_before": tokens_before,
|
|
660
|
+
"tokens_after": tokens_after,
|
|
661
|
+
"tokens_saved": tokens_saved,
|
|
662
|
+
"cleared_tool_ids": list(ids_to_remove),
|
|
663
|
+
},
|
|
664
|
+
)
|
|
648
665
|
|
|
649
666
|
return CompactionResult(
|
|
650
667
|
messages=compacted_messages,
|
|
651
668
|
tokens_before=tokens_before,
|
|
652
669
|
tokens_after=tokens_after,
|
|
653
|
-
tokens_saved=
|
|
670
|
+
tokens_saved=tokens_saved,
|
|
654
671
|
cleared_tool_ids=ids_to_remove,
|
|
655
672
|
was_compacted=bool(ids_to_remove),
|
|
656
673
|
)
|
ripperdoc/utils/messages.py
CHANGED
|
@@ -4,6 +4,7 @@ This module provides utilities for creating and normalizing messages
|
|
|
4
4
|
for communication with AI models.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import json
|
|
7
8
|
from typing import Any, Dict, List, Optional, Union
|
|
8
9
|
from pydantic import BaseModel, ConfigDict
|
|
9
10
|
from uuid import uuid4
|
|
@@ -30,7 +31,7 @@ class MessageContent(BaseModel):
|
|
|
30
31
|
id: Optional[str] = None
|
|
31
32
|
tool_use_id: Optional[str] = None
|
|
32
33
|
name: Optional[str] = None
|
|
33
|
-
input: Optional[Dict[str,
|
|
34
|
+
input: Optional[Dict[str, object]] = None
|
|
34
35
|
is_error: Optional[bool] = None
|
|
35
36
|
|
|
36
37
|
|
|
@@ -75,6 +76,7 @@ def _content_block_to_openai(block: MessageContent) -> Dict[str, Any]:
|
|
|
75
76
|
try:
|
|
76
77
|
args_str = json.dumps(args)
|
|
77
78
|
except Exception:
|
|
79
|
+
logger.exception("[_content_block_to_openai] Failed to serialize tool arguments")
|
|
78
80
|
args_str = "{}"
|
|
79
81
|
tool_call_id = (
|
|
80
82
|
getattr(block, "id", None) or getattr(block, "tool_use_id", "") or str(uuid4())
|
|
@@ -118,7 +120,7 @@ class Message(BaseModel):
|
|
|
118
120
|
content: Union[str, List[MessageContent]]
|
|
119
121
|
uuid: str = ""
|
|
120
122
|
|
|
121
|
-
def __init__(self, **data:
|
|
123
|
+
def __init__(self, **data: object) -> None:
|
|
122
124
|
if "uuid" not in data or not data["uuid"]:
|
|
123
125
|
data["uuid"] = str(uuid4())
|
|
124
126
|
super().__init__(**data)
|
|
@@ -130,9 +132,9 @@ class UserMessage(BaseModel):
|
|
|
130
132
|
type: str = "user"
|
|
131
133
|
message: Message
|
|
132
134
|
uuid: str = ""
|
|
133
|
-
tool_use_result: Optional[
|
|
135
|
+
tool_use_result: Optional[object] = None
|
|
134
136
|
|
|
135
|
-
def __init__(self, **data:
|
|
137
|
+
def __init__(self, **data: object) -> None:
|
|
136
138
|
if "uuid" not in data or not data["uuid"]:
|
|
137
139
|
data["uuid"] = str(uuid4())
|
|
138
140
|
super().__init__(**data)
|
|
@@ -148,7 +150,7 @@ class AssistantMessage(BaseModel):
|
|
|
148
150
|
duration_ms: float = 0.0
|
|
149
151
|
is_api_error_message: bool = False
|
|
150
152
|
|
|
151
|
-
def __init__(self, **data:
|
|
153
|
+
def __init__(self, **data: object) -> None:
|
|
152
154
|
if "uuid" not in data or not data["uuid"]:
|
|
153
155
|
data["uuid"] = str(uuid4())
|
|
154
156
|
super().__init__(**data)
|
|
@@ -165,14 +167,14 @@ class ProgressMessage(BaseModel):
|
|
|
165
167
|
sibling_tool_use_ids: set[str] = set()
|
|
166
168
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
167
169
|
|
|
168
|
-
def __init__(self, **data:
|
|
170
|
+
def __init__(self, **data: object) -> None:
|
|
169
171
|
if "uuid" not in data or not data["uuid"]:
|
|
170
172
|
data["uuid"] = str(uuid4())
|
|
171
173
|
super().__init__(**data)
|
|
172
174
|
|
|
173
175
|
|
|
174
176
|
def create_user_message(
|
|
175
|
-
content: Union[str, List[Dict[str, Any]]], tool_use_result: Optional[
|
|
177
|
+
content: Union[str, List[Dict[str, Any]]], tool_use_result: Optional[object] = None
|
|
176
178
|
) -> UserMessage:
|
|
177
179
|
"""Create a user message."""
|
|
178
180
|
if isinstance(content, str):
|
|
@@ -187,7 +189,7 @@ def create_user_message(
|
|
|
187
189
|
tool_use_result = tool_use_result.model_dump()
|
|
188
190
|
except Exception:
|
|
189
191
|
# Fallback: keep as-is if conversion fails
|
|
190
|
-
|
|
192
|
+
logger.exception("[create_user_message] Failed to normalize tool_use_result")
|
|
191
193
|
|
|
192
194
|
message = Message(role=MessageRole.USER, content=message_content)
|
|
193
195
|
|
|
@@ -237,6 +239,7 @@ def create_progress_message(
|
|
|
237
239
|
def normalize_messages_for_api(
|
|
238
240
|
messages: List[Union[UserMessage, AssistantMessage, ProgressMessage]],
|
|
239
241
|
protocol: str = "anthropic",
|
|
242
|
+
tool_mode: str = "native",
|
|
240
243
|
) -> List[Dict[str, Any]]:
|
|
241
244
|
"""Normalize messages for API submission.
|
|
242
245
|
|
|
@@ -261,6 +264,62 @@ def normalize_messages_for_api(
|
|
|
261
264
|
return msg.get("content")
|
|
262
265
|
return None
|
|
263
266
|
|
|
267
|
+
def _block_type(block: Any) -> Optional[str]:
|
|
268
|
+
if hasattr(block, "type"):
|
|
269
|
+
return getattr(block, "type", None)
|
|
270
|
+
if isinstance(block, dict):
|
|
271
|
+
return block.get("type")
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
def _block_attr(block: Any, attr: str, default: Any = None) -> Any:
|
|
275
|
+
if hasattr(block, attr):
|
|
276
|
+
return getattr(block, attr, default)
|
|
277
|
+
if isinstance(block, dict):
|
|
278
|
+
return block.get(attr, default)
|
|
279
|
+
return default
|
|
280
|
+
|
|
281
|
+
def _flatten_blocks_to_text(blocks: List[Any]) -> str:
|
|
282
|
+
parts: List[str] = []
|
|
283
|
+
for blk in blocks:
|
|
284
|
+
btype = _block_type(blk)
|
|
285
|
+
if btype == "text":
|
|
286
|
+
text = _block_attr(blk, "text") or _block_attr(blk, "content") or ""
|
|
287
|
+
if text:
|
|
288
|
+
parts.append(str(text))
|
|
289
|
+
elif btype == "tool_result":
|
|
290
|
+
text = _block_attr(blk, "text") or _block_attr(blk, "content") or ""
|
|
291
|
+
tool_id = _block_attr(blk, "tool_use_id") or _block_attr(blk, "id")
|
|
292
|
+
prefix = "Tool error" if _block_attr(blk, "is_error") else "Tool result"
|
|
293
|
+
label = f"{prefix}{f' ({tool_id})' if tool_id else ''}"
|
|
294
|
+
parts.append(f"{label}: {text}" if text else label)
|
|
295
|
+
elif btype == "tool_use":
|
|
296
|
+
name = _block_attr(blk, "name") or ""
|
|
297
|
+
input_data = _block_attr(blk, "input")
|
|
298
|
+
input_preview = ""
|
|
299
|
+
if input_data not in (None, {}):
|
|
300
|
+
try:
|
|
301
|
+
input_preview = json.dumps(input_data)
|
|
302
|
+
except Exception:
|
|
303
|
+
input_preview = str(input_data)
|
|
304
|
+
tool_id = _block_attr(blk, "tool_use_id") or _block_attr(blk, "id")
|
|
305
|
+
desc = "Tool call"
|
|
306
|
+
if name:
|
|
307
|
+
desc += f" {name}"
|
|
308
|
+
if tool_id:
|
|
309
|
+
desc += f" ({tool_id})"
|
|
310
|
+
if input_preview:
|
|
311
|
+
desc += f": {input_preview}"
|
|
312
|
+
parts.append(desc)
|
|
313
|
+
else:
|
|
314
|
+
text = _block_attr(blk, "text") or _block_attr(blk, "content") or ""
|
|
315
|
+
if text:
|
|
316
|
+
parts.append(str(text))
|
|
317
|
+
return "\n".join(p for p in parts if p)
|
|
318
|
+
|
|
319
|
+
effective_tool_mode = (tool_mode or "native").lower()
|
|
320
|
+
if effective_tool_mode not in {"native", "text"}:
|
|
321
|
+
effective_tool_mode = "native"
|
|
322
|
+
|
|
264
323
|
normalized: List[Dict[str, Any]] = []
|
|
265
324
|
tool_results_seen = 0
|
|
266
325
|
tool_uses_seen = 0
|
|
@@ -312,9 +371,7 @@ def normalize_messages_for_api(
|
|
|
312
371
|
api_blocks.append(_content_block_to_api(block))
|
|
313
372
|
normalized.append({"role": "user", "content": api_blocks})
|
|
314
373
|
else:
|
|
315
|
-
normalized.append(
|
|
316
|
-
{"role": "user", "content": user_content} # type: ignore
|
|
317
|
-
)
|
|
374
|
+
normalized.append({"role": "user", "content": user_content}) # type: ignore
|
|
318
375
|
elif msg_type == "assistant":
|
|
319
376
|
asst_content = _msg_content(msg)
|
|
320
377
|
if isinstance(asst_content, list):
|
|
@@ -369,14 +426,12 @@ def normalize_messages_for_api(
|
|
|
369
426
|
api_blocks.append(_content_block_to_api(block))
|
|
370
427
|
normalized.append({"role": "assistant", "content": api_blocks})
|
|
371
428
|
else:
|
|
372
|
-
normalized.append(
|
|
373
|
-
{"role": "assistant", "content": asst_content} # type: ignore
|
|
374
|
-
)
|
|
429
|
+
normalized.append({"role": "assistant", "content": asst_content}) # type: ignore
|
|
375
430
|
|
|
376
431
|
logger.debug(
|
|
377
|
-
f"[normalize_messages_for_api] protocol={protocol}
|
|
378
|
-
f"
|
|
379
|
-
f"tool_uses_seen={tool_uses_seen} "
|
|
432
|
+
f"[normalize_messages_for_api] protocol={protocol} tool_mode={effective_tool_mode} "
|
|
433
|
+
f"input_msgs={len(messages)} normalized={len(normalized)} "
|
|
434
|
+
f"tool_results_seen={tool_results_seen} tool_uses_seen={tool_uses_seen} "
|
|
380
435
|
f"tool_result_positions={len(tool_result_positions)} "
|
|
381
436
|
f"skipped_tool_uses_no_result={skipped_tool_uses_no_result} "
|
|
382
437
|
f"skipped_tool_uses_no_id={skipped_tool_uses_no_id}"
|