ripperdoc 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ripperdoc/__init__.py +3 -0
- ripperdoc/__main__.py +25 -0
- ripperdoc/cli/__init__.py +1 -0
- ripperdoc/cli/cli.py +317 -0
- ripperdoc/cli/commands/__init__.py +76 -0
- ripperdoc/cli/commands/agents_cmd.py +234 -0
- ripperdoc/cli/commands/base.py +19 -0
- ripperdoc/cli/commands/clear_cmd.py +18 -0
- ripperdoc/cli/commands/compact_cmd.py +19 -0
- ripperdoc/cli/commands/config_cmd.py +31 -0
- ripperdoc/cli/commands/context_cmd.py +114 -0
- ripperdoc/cli/commands/cost_cmd.py +77 -0
- ripperdoc/cli/commands/exit_cmd.py +19 -0
- ripperdoc/cli/commands/help_cmd.py +20 -0
- ripperdoc/cli/commands/mcp_cmd.py +65 -0
- ripperdoc/cli/commands/models_cmd.py +327 -0
- ripperdoc/cli/commands/resume_cmd.py +97 -0
- ripperdoc/cli/commands/status_cmd.py +167 -0
- ripperdoc/cli/commands/tasks_cmd.py +240 -0
- ripperdoc/cli/commands/todos_cmd.py +69 -0
- ripperdoc/cli/commands/tools_cmd.py +19 -0
- ripperdoc/cli/ui/__init__.py +1 -0
- ripperdoc/cli/ui/context_display.py +297 -0
- ripperdoc/cli/ui/helpers.py +22 -0
- ripperdoc/cli/ui/rich_ui.py +1010 -0
- ripperdoc/cli/ui/spinner.py +50 -0
- ripperdoc/core/__init__.py +1 -0
- ripperdoc/core/agents.py +306 -0
- ripperdoc/core/commands.py +33 -0
- ripperdoc/core/config.py +382 -0
- ripperdoc/core/default_tools.py +57 -0
- ripperdoc/core/permissions.py +227 -0
- ripperdoc/core/query.py +682 -0
- ripperdoc/core/system_prompt.py +418 -0
- ripperdoc/core/tool.py +214 -0
- ripperdoc/sdk/__init__.py +9 -0
- ripperdoc/sdk/client.py +309 -0
- ripperdoc/tools/__init__.py +1 -0
- ripperdoc/tools/background_shell.py +291 -0
- ripperdoc/tools/bash_output_tool.py +98 -0
- ripperdoc/tools/bash_tool.py +822 -0
- ripperdoc/tools/file_edit_tool.py +281 -0
- ripperdoc/tools/file_read_tool.py +168 -0
- ripperdoc/tools/file_write_tool.py +141 -0
- ripperdoc/tools/glob_tool.py +134 -0
- ripperdoc/tools/grep_tool.py +232 -0
- ripperdoc/tools/kill_bash_tool.py +136 -0
- ripperdoc/tools/ls_tool.py +298 -0
- ripperdoc/tools/mcp_tools.py +804 -0
- ripperdoc/tools/multi_edit_tool.py +393 -0
- ripperdoc/tools/notebook_edit_tool.py +325 -0
- ripperdoc/tools/task_tool.py +282 -0
- ripperdoc/tools/todo_tool.py +362 -0
- ripperdoc/tools/tool_search_tool.py +366 -0
- ripperdoc/utils/__init__.py +1 -0
- ripperdoc/utils/bash_constants.py +51 -0
- ripperdoc/utils/bash_output_utils.py +43 -0
- ripperdoc/utils/exit_code_handlers.py +241 -0
- ripperdoc/utils/log.py +76 -0
- ripperdoc/utils/mcp.py +427 -0
- ripperdoc/utils/memory.py +239 -0
- ripperdoc/utils/message_compaction.py +640 -0
- ripperdoc/utils/messages.py +399 -0
- ripperdoc/utils/output_utils.py +233 -0
- ripperdoc/utils/path_utils.py +46 -0
- ripperdoc/utils/permissions/__init__.py +21 -0
- ripperdoc/utils/permissions/path_validation_utils.py +165 -0
- ripperdoc/utils/permissions/shell_command_validation.py +74 -0
- ripperdoc/utils/permissions/tool_permission_utils.py +279 -0
- ripperdoc/utils/safe_get_cwd.py +24 -0
- ripperdoc/utils/sandbox_utils.py +38 -0
- ripperdoc/utils/session_history.py +223 -0
- ripperdoc/utils/session_usage.py +110 -0
- ripperdoc/utils/shell_token_utils.py +95 -0
- ripperdoc/utils/todo.py +199 -0
- ripperdoc-0.1.0.dist-info/METADATA +178 -0
- ripperdoc-0.1.0.dist-info/RECORD +81 -0
- ripperdoc-0.1.0.dist-info/WHEEL +5 -0
- ripperdoc-0.1.0.dist-info/entry_points.txt +3 -0
- ripperdoc-0.1.0.dist-info/licenses/LICENSE +53 -0
- ripperdoc-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""Session log storage and retrieval."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
from ripperdoc.utils.log import get_logger
|
|
12
|
+
from ripperdoc.utils.messages import (
|
|
13
|
+
AssistantMessage,
|
|
14
|
+
ProgressMessage,
|
|
15
|
+
UserMessage,
|
|
16
|
+
)
|
|
17
|
+
from ripperdoc.utils.path_utils import project_storage_dir
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = get_logger()
|
|
21
|
+
|
|
22
|
+
ConversationMessage = UserMessage | AssistantMessage | ProgressMessage
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class SessionSummary:
|
|
27
|
+
session_id: str
|
|
28
|
+
path: Path
|
|
29
|
+
message_count: int
|
|
30
|
+
created_at: datetime
|
|
31
|
+
updated_at: datetime
|
|
32
|
+
first_prompt: str
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _sessions_root() -> Path:
|
|
36
|
+
return Path.home() / ".ripperdoc" / "sessions"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _session_file(project_path: Path, session_id: str) -> Path:
|
|
40
|
+
directory = project_storage_dir(_sessions_root(), project_path, ensure=True)
|
|
41
|
+
return directory / f"{session_id}.jsonl"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _now_iso() -> str:
|
|
45
|
+
return datetime.utcnow().isoformat() + "Z"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _extract_prompt(payload: dict) -> str:
|
|
49
|
+
"""Pull a short preview of the first user message."""
|
|
50
|
+
if payload.get("type") != "user":
|
|
51
|
+
return ""
|
|
52
|
+
message = payload.get("message") or {}
|
|
53
|
+
content = message.get("content")
|
|
54
|
+
preview = ""
|
|
55
|
+
if isinstance(content, str):
|
|
56
|
+
preview = content
|
|
57
|
+
elif isinstance(content, list):
|
|
58
|
+
for block in content:
|
|
59
|
+
if not isinstance(block, dict):
|
|
60
|
+
continue
|
|
61
|
+
if block.get("type") == "text" and block.get("text"):
|
|
62
|
+
preview = str(block["text"])
|
|
63
|
+
break
|
|
64
|
+
preview = (preview or "").replace("\n", " ").strip()
|
|
65
|
+
if len(preview) > 80:
|
|
66
|
+
preview = preview[:77] + "..."
|
|
67
|
+
return preview
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _deserialize_message(payload: dict) -> Optional[ConversationMessage]:
|
|
71
|
+
"""Rebuild a message model from a stored payload."""
|
|
72
|
+
msg_type = payload.get("type")
|
|
73
|
+
if msg_type == "user":
|
|
74
|
+
return UserMessage(**payload)
|
|
75
|
+
if msg_type == "assistant":
|
|
76
|
+
return AssistantMessage(**payload)
|
|
77
|
+
if msg_type == "progress":
|
|
78
|
+
return ProgressMessage(**payload)
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class SessionHistory:
|
|
83
|
+
"""Append-only session log for a single session id."""
|
|
84
|
+
|
|
85
|
+
def __init__(self, project_path: Path, session_id: str):
|
|
86
|
+
self.project_path = project_path
|
|
87
|
+
self.session_id = session_id
|
|
88
|
+
self.path = _session_file(project_path, session_id)
|
|
89
|
+
self._seen_ids: set[str] = set()
|
|
90
|
+
self._load_seen_ids()
|
|
91
|
+
|
|
92
|
+
def _load_seen_ids(self) -> None:
|
|
93
|
+
if not self.path.exists():
|
|
94
|
+
return
|
|
95
|
+
try:
|
|
96
|
+
with self.path.open("r", encoding="utf-8") as fh:
|
|
97
|
+
for line in fh:
|
|
98
|
+
try:
|
|
99
|
+
data = json.loads(line)
|
|
100
|
+
payload = data.get("payload") or {}
|
|
101
|
+
msg_uuid = payload.get("uuid")
|
|
102
|
+
if isinstance(msg_uuid, str):
|
|
103
|
+
self._seen_ids.add(msg_uuid)
|
|
104
|
+
except Exception as exc:
|
|
105
|
+
logger.debug(f"Failed to parse session history line: {exc}")
|
|
106
|
+
continue
|
|
107
|
+
except Exception as exc:
|
|
108
|
+
logger.error(f"Failed to load seen IDs from session {self.session_id}: {exc}")
|
|
109
|
+
return
|
|
110
|
+
|
|
111
|
+
def append(self, message: ConversationMessage) -> None:
|
|
112
|
+
"""Persist a single message to the session log."""
|
|
113
|
+
# Skip progress noise
|
|
114
|
+
if getattr(message, "type", None) == "progress":
|
|
115
|
+
return
|
|
116
|
+
msg_uuid = getattr(message, "uuid", None)
|
|
117
|
+
if isinstance(msg_uuid, str) and msg_uuid in self._seen_ids:
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
payload = message.model_dump(mode="json")
|
|
121
|
+
entry = {
|
|
122
|
+
"logged_at": _now_iso(),
|
|
123
|
+
"payload": payload,
|
|
124
|
+
}
|
|
125
|
+
try:
|
|
126
|
+
with self.path.open("a", encoding="utf-8") as fh:
|
|
127
|
+
json.dump(entry, fh)
|
|
128
|
+
fh.write("\n")
|
|
129
|
+
if isinstance(msg_uuid, str):
|
|
130
|
+
self._seen_ids.add(msg_uuid)
|
|
131
|
+
except Exception as exc:
|
|
132
|
+
# Avoid crashing the UI if logging fails
|
|
133
|
+
logger.error(f"Failed to append message to session {self.session_id}: {exc}")
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def list_session_summaries(project_path: Path) -> List[SessionSummary]:
|
|
138
|
+
"""Return available sessions for the project ordered by last update desc."""
|
|
139
|
+
directory = project_storage_dir(_sessions_root(), project_path)
|
|
140
|
+
if not directory.exists():
|
|
141
|
+
return []
|
|
142
|
+
|
|
143
|
+
summaries: List[SessionSummary] = []
|
|
144
|
+
for jsonl_path in directory.glob("*.jsonl"):
|
|
145
|
+
try:
|
|
146
|
+
with jsonl_path.open("r", encoding="utf-8") as fh:
|
|
147
|
+
messages = [json.loads(line) for line in fh if line.strip()]
|
|
148
|
+
except Exception as exc:
|
|
149
|
+
logger.error(f"Failed to load session summary from {jsonl_path}: {exc}")
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
payloads = [entry.get("payload") or {} for entry in messages]
|
|
153
|
+
conversation_payloads = [
|
|
154
|
+
payload for payload in payloads if payload.get("type") in ("user", "assistant")
|
|
155
|
+
]
|
|
156
|
+
if not conversation_payloads:
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
created_raw = messages[0].get("logged_at")
|
|
160
|
+
updated_raw = messages[-1].get("logged_at")
|
|
161
|
+
created_at = (
|
|
162
|
+
datetime.fromisoformat(created_raw.replace("Z", "+00:00"))
|
|
163
|
+
if isinstance(created_raw, str)
|
|
164
|
+
else datetime.fromtimestamp(jsonl_path.stat().st_ctime)
|
|
165
|
+
)
|
|
166
|
+
updated_at = (
|
|
167
|
+
datetime.fromisoformat(updated_raw.replace("Z", "+00:00"))
|
|
168
|
+
if isinstance(updated_raw, str)
|
|
169
|
+
else datetime.fromtimestamp(jsonl_path.stat().st_mtime)
|
|
170
|
+
)
|
|
171
|
+
first_prompt = ""
|
|
172
|
+
for payload in conversation_payloads:
|
|
173
|
+
first_prompt = _extract_prompt(payload)
|
|
174
|
+
if first_prompt:
|
|
175
|
+
break
|
|
176
|
+
summaries.append(
|
|
177
|
+
SessionSummary(
|
|
178
|
+
session_id=jsonl_path.stem,
|
|
179
|
+
path=jsonl_path,
|
|
180
|
+
message_count=len(conversation_payloads),
|
|
181
|
+
created_at=created_at,
|
|
182
|
+
updated_at=updated_at,
|
|
183
|
+
first_prompt=first_prompt or "(no prompt)",
|
|
184
|
+
)
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
return sorted(summaries, key=lambda s: s.updated_at, reverse=True)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def load_session_messages(project_path: Path, session_id: str) -> List[ConversationMessage]:
|
|
191
|
+
"""Load messages for a stored session."""
|
|
192
|
+
path = _session_file(project_path, session_id)
|
|
193
|
+
if not path.exists():
|
|
194
|
+
return []
|
|
195
|
+
|
|
196
|
+
messages: List[ConversationMessage] = []
|
|
197
|
+
try:
|
|
198
|
+
with path.open("r", encoding="utf-8") as fh:
|
|
199
|
+
for line in fh:
|
|
200
|
+
if not line.strip():
|
|
201
|
+
continue
|
|
202
|
+
try:
|
|
203
|
+
data = json.loads(line)
|
|
204
|
+
payload = data.get("payload") or {}
|
|
205
|
+
msg = _deserialize_message(payload)
|
|
206
|
+
if msg is not None and getattr(msg, "type", None) != "progress":
|
|
207
|
+
messages.append(msg)
|
|
208
|
+
except Exception as exc:
|
|
209
|
+
logger.debug(f"Failed to deserialize message in session {session_id}: {exc}")
|
|
210
|
+
continue
|
|
211
|
+
except Exception as exc:
|
|
212
|
+
logger.error(f"Failed to load session messages for {session_id}: {exc}")
|
|
213
|
+
return []
|
|
214
|
+
|
|
215
|
+
return messages
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
__all__ = [
|
|
219
|
+
"SessionHistory",
|
|
220
|
+
"SessionSummary",
|
|
221
|
+
"list_session_summaries",
|
|
222
|
+
"load_session_messages",
|
|
223
|
+
]
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Session-level usage tracking for model calls."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any, Dict
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ModelUsage:
|
|
12
|
+
"""Aggregate token and duration stats for a single model."""
|
|
13
|
+
|
|
14
|
+
input_tokens: int = 0
|
|
15
|
+
output_tokens: int = 0
|
|
16
|
+
cache_read_input_tokens: int = 0
|
|
17
|
+
cache_creation_input_tokens: int = 0
|
|
18
|
+
requests: int = 0
|
|
19
|
+
duration_ms: float = 0.0
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class SessionUsage:
|
|
24
|
+
"""In-memory snapshot of usage for the current session."""
|
|
25
|
+
|
|
26
|
+
models: Dict[str, ModelUsage] = field(default_factory=dict)
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def total_input_tokens(self) -> int:
|
|
30
|
+
return sum(usage.input_tokens for usage in self.models.values())
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def total_output_tokens(self) -> int:
|
|
34
|
+
return sum(usage.output_tokens for usage in self.models.values())
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def total_cache_read_tokens(self) -> int:
|
|
38
|
+
return sum(usage.cache_read_input_tokens for usage in self.models.values())
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def total_cache_creation_tokens(self) -> int:
|
|
42
|
+
return sum(usage.cache_creation_input_tokens for usage in self.models.values())
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def total_requests(self) -> int:
|
|
46
|
+
return sum(usage.requests for usage in self.models.values())
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def total_duration_ms(self) -> float:
|
|
50
|
+
return sum(usage.duration_ms for usage in self.models.values())
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
_SESSION_USAGE = SessionUsage()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _as_int(value: Any) -> int:
|
|
57
|
+
"""Best-effort integer conversion."""
|
|
58
|
+
try:
|
|
59
|
+
if value is None:
|
|
60
|
+
return 0
|
|
61
|
+
return int(value)
|
|
62
|
+
except (TypeError, ValueError):
|
|
63
|
+
return 0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _model_key(model: str) -> str:
|
|
67
|
+
"""Normalize model names for use as dictionary keys."""
|
|
68
|
+
return model or "unknown"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def record_usage(
|
|
72
|
+
model: str,
|
|
73
|
+
*,
|
|
74
|
+
input_tokens: int = 0,
|
|
75
|
+
output_tokens: int = 0,
|
|
76
|
+
cache_read_input_tokens: int = 0,
|
|
77
|
+
cache_creation_input_tokens: int = 0,
|
|
78
|
+
duration_ms: float = 0.0,
|
|
79
|
+
) -> None:
|
|
80
|
+
"""Record a single model invocation."""
|
|
81
|
+
global _SESSION_USAGE
|
|
82
|
+
key = _model_key(model)
|
|
83
|
+
usage = _SESSION_USAGE.models.setdefault(key, ModelUsage())
|
|
84
|
+
|
|
85
|
+
usage.input_tokens += _as_int(input_tokens)
|
|
86
|
+
usage.output_tokens += _as_int(output_tokens)
|
|
87
|
+
usage.cache_read_input_tokens += _as_int(cache_read_input_tokens)
|
|
88
|
+
usage.cache_creation_input_tokens += _as_int(cache_creation_input_tokens)
|
|
89
|
+
usage.duration_ms += float(duration_ms) if duration_ms and duration_ms > 0 else 0.0
|
|
90
|
+
usage.requests += 1
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_session_usage() -> SessionUsage:
|
|
94
|
+
"""Return a copy of the current session usage."""
|
|
95
|
+
return deepcopy(_SESSION_USAGE)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def reset_session_usage() -> None:
|
|
99
|
+
"""Clear all recorded usage (primarily for tests)."""
|
|
100
|
+
global _SESSION_USAGE
|
|
101
|
+
_SESSION_USAGE = SessionUsage()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
__all__ = [
|
|
105
|
+
"ModelUsage",
|
|
106
|
+
"SessionUsage",
|
|
107
|
+
"get_session_usage",
|
|
108
|
+
"record_usage",
|
|
109
|
+
"reset_session_usage",
|
|
110
|
+
]
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Shell token parsing utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import shlex
|
|
7
|
+
from typing import Iterable, List
|
|
8
|
+
|
|
9
|
+
# Operators and redirections that should not be treated as executable tokens.
|
|
10
|
+
SHELL_OPERATORS_WITH_REDIRECTION: set[str] = {
|
|
11
|
+
"|",
|
|
12
|
+
"||",
|
|
13
|
+
"&&",
|
|
14
|
+
";",
|
|
15
|
+
">",
|
|
16
|
+
">>",
|
|
17
|
+
"<",
|
|
18
|
+
"<<",
|
|
19
|
+
"2>",
|
|
20
|
+
"&>",
|
|
21
|
+
"2>&1",
|
|
22
|
+
"|&",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
_REDIRECTION_PATTERNS = (
|
|
26
|
+
re.compile(r"^\d?>?&\d+$"), # 2>&1, >&2, etc.
|
|
27
|
+
re.compile(r"^\d?>/dev/null$"), # 2>/dev/null, >/dev/null
|
|
28
|
+
re.compile(r"^/dev/null$"),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def parse_shell_tokens(shell_command: str) -> List[str]:
|
|
33
|
+
"""Parse a shell command into tokens, preserving operators for inspection."""
|
|
34
|
+
if not shell_command:
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
lexer = shlex.shlex(shell_command, posix=True)
|
|
38
|
+
lexer.whitespace_split = True
|
|
39
|
+
lexer.commenters = ""
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
return list(lexer)
|
|
43
|
+
except ValueError:
|
|
44
|
+
# Fall back to a coarse split to avoid hard failures.
|
|
45
|
+
return shell_command.split()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def filter_valid_tokens(tokens: Iterable[str]) -> list[str]:
|
|
49
|
+
"""Remove shell control operators and redirection tokens."""
|
|
50
|
+
return [token for token in tokens if token not in SHELL_OPERATORS_WITH_REDIRECTION]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _is_redirection_token(token: str) -> bool:
|
|
54
|
+
return any(pattern.match(token) for pattern in _REDIRECTION_PATTERNS)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def parse_and_clean_shell_tokens(raw_shell_string: str) -> List[str]:
|
|
58
|
+
"""Parse tokens and strip benign redirections to mirror reference cleaning."""
|
|
59
|
+
tokens = parse_shell_tokens(raw_shell_string)
|
|
60
|
+
if not tokens:
|
|
61
|
+
return []
|
|
62
|
+
|
|
63
|
+
cleaned: list[str] = []
|
|
64
|
+
skip_next = False
|
|
65
|
+
|
|
66
|
+
for idx, token in enumerate(tokens):
|
|
67
|
+
if skip_next:
|
|
68
|
+
skip_next = False
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
# Handle explicit redirection operators that are followed by a target.
|
|
72
|
+
if token in {">&", ">", "1>", "2>", ">>"}:
|
|
73
|
+
if idx + 1 < len(tokens):
|
|
74
|
+
next_token = tokens[idx + 1]
|
|
75
|
+
if _is_redirection_token(next_token):
|
|
76
|
+
skip_next = True
|
|
77
|
+
continue
|
|
78
|
+
cleaned.append(token)
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
# Skip inlined redirection tokens to /dev/null or file descriptors.
|
|
82
|
+
if _is_redirection_token(token):
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
cleaned.append(token)
|
|
86
|
+
|
|
87
|
+
return filter_valid_tokens(cleaned)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
__all__ = [
|
|
91
|
+
"parse_shell_tokens",
|
|
92
|
+
"parse_and_clean_shell_tokens",
|
|
93
|
+
"filter_valid_tokens",
|
|
94
|
+
"SHELL_OPERATORS_WITH_REDIRECTION",
|
|
95
|
+
]
|
ripperdoc/utils/todo.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""Todo storage and utilities for Ripperdoc.
|
|
2
|
+
|
|
3
|
+
This module provides simple, file-based todo management so tools can
|
|
4
|
+
persist and query tasks between turns. Todos are stored under the user's
|
|
5
|
+
home directory at `~/.ripperdoc/todos/<project>/todos.json`, where
|
|
6
|
+
`<project>` is a sanitized form of the project path.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import List, Literal, Optional, Sequence, Tuple
|
|
15
|
+
from pydantic import BaseModel, ConfigDict, Field, ValidationError
|
|
16
|
+
|
|
17
|
+
from ripperdoc.utils.log import get_logger
|
|
18
|
+
from ripperdoc.utils.path_utils import project_storage_dir
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
logger = get_logger()
|
|
22
|
+
|
|
23
|
+
TodoStatus = Literal["pending", "in_progress", "completed"]
|
|
24
|
+
TodoPriority = Literal["high", "medium", "low"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TodoItem(BaseModel):
|
|
28
|
+
"""Represents a single todo entry."""
|
|
29
|
+
|
|
30
|
+
id: str = Field(description="Unique identifier for the todo item")
|
|
31
|
+
content: str = Field(description="Task description")
|
|
32
|
+
status: TodoStatus = Field(
|
|
33
|
+
default="pending", description="Current state: pending, in_progress, completed"
|
|
34
|
+
)
|
|
35
|
+
priority: TodoPriority = Field(default="medium", description="Priority: high|medium|low")
|
|
36
|
+
created_at: Optional[float] = Field(default=None, description="Unix timestamp when created")
|
|
37
|
+
updated_at: Optional[float] = Field(default=None, description="Unix timestamp when updated")
|
|
38
|
+
previous_status: Optional[TodoStatus] = Field(
|
|
39
|
+
default=None, description="Previous status, used for audits"
|
|
40
|
+
)
|
|
41
|
+
model_config = ConfigDict(extra="ignore")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
MAX_TODOS = 200
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _storage_path(project_root: Optional[Path], ensure_dir: bool) -> Path:
|
|
48
|
+
"""Return the todo storage path, optionally ensuring the directory exists."""
|
|
49
|
+
root = project_root or Path.cwd()
|
|
50
|
+
base_dir = Path.home() / ".ripperdoc" / "todos"
|
|
51
|
+
storage_dir = project_storage_dir(base_dir, root, ensure=ensure_dir)
|
|
52
|
+
return storage_dir / "todos.json"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def validate_todos(
|
|
56
|
+
todos: Sequence[TodoItem], max_items: int = MAX_TODOS
|
|
57
|
+
) -> Tuple[bool, str | None]:
|
|
58
|
+
"""Basic validation for a todo list."""
|
|
59
|
+
if len(todos) > max_items:
|
|
60
|
+
return False, f"Too many todos; limit is {max_items}."
|
|
61
|
+
|
|
62
|
+
ids = [todo.id for todo in todos]
|
|
63
|
+
duplicate_ids = {id_ for id_ in ids if ids.count(id_) > 1}
|
|
64
|
+
if duplicate_ids:
|
|
65
|
+
return False, f"Duplicate todo IDs found: {sorted(duplicate_ids)}"
|
|
66
|
+
|
|
67
|
+
in_progress = [todo for todo in todos if todo.status == "in_progress"]
|
|
68
|
+
if len(in_progress) > 1:
|
|
69
|
+
return False, "Only one todo can be marked in_progress at a time."
|
|
70
|
+
|
|
71
|
+
empty_contents = [todo.id for todo in todos if not todo.content.strip()]
|
|
72
|
+
if empty_contents:
|
|
73
|
+
return False, f"Todos require content. Empty content for IDs: {sorted(empty_contents)}"
|
|
74
|
+
|
|
75
|
+
return True, None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def load_todos(project_root: Optional[Path] = None) -> List[TodoItem]:
|
|
79
|
+
"""Load todos from disk."""
|
|
80
|
+
path = _storage_path(project_root, ensure_dir=False)
|
|
81
|
+
if not path.exists():
|
|
82
|
+
return []
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
raw = json.loads(path.read_text())
|
|
86
|
+
except Exception as exc:
|
|
87
|
+
logger.error(f"Failed to load todos from {path}: {exc}")
|
|
88
|
+
return []
|
|
89
|
+
|
|
90
|
+
todos: List[TodoItem] = []
|
|
91
|
+
for item in raw:
|
|
92
|
+
try:
|
|
93
|
+
todos.append(TodoItem(**item))
|
|
94
|
+
except ValidationError as exc:
|
|
95
|
+
logger.error(f"Failed to parse todo item: {exc}")
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
# Preserve stored order; do not reorder based on status/priority.
|
|
99
|
+
return todos
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def save_todos(todos: Sequence[TodoItem], project_root: Optional[Path] = None) -> None:
|
|
103
|
+
"""Persist todos to disk."""
|
|
104
|
+
path = _storage_path(project_root, ensure_dir=True)
|
|
105
|
+
path.write_text(json.dumps([todo.model_dump() for todo in todos], indent=2))
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def set_todos(
|
|
109
|
+
todos: Sequence[TodoItem],
|
|
110
|
+
project_root: Optional[Path] = None,
|
|
111
|
+
) -> List[TodoItem]:
|
|
112
|
+
"""Validate, normalize, and persist the provided todos."""
|
|
113
|
+
ok, message = validate_todos(todos)
|
|
114
|
+
if not ok:
|
|
115
|
+
raise ValueError(message or "Invalid todos.")
|
|
116
|
+
|
|
117
|
+
existing = {todo.id: todo for todo in load_todos(project_root)}
|
|
118
|
+
now = time.time()
|
|
119
|
+
|
|
120
|
+
normalized: List[TodoItem] = []
|
|
121
|
+
for todo in todos:
|
|
122
|
+
previous = existing.get(todo.id)
|
|
123
|
+
normalized.append(
|
|
124
|
+
todo.model_copy(
|
|
125
|
+
update={
|
|
126
|
+
"created_at": previous.created_at if previous else todo.created_at or now,
|
|
127
|
+
"updated_at": now,
|
|
128
|
+
"previous_status": (
|
|
129
|
+
previous.status
|
|
130
|
+
if previous and previous.status != todo.status
|
|
131
|
+
else todo.previous_status
|
|
132
|
+
),
|
|
133
|
+
}
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Keep the caller-provided order; do not resort.
|
|
138
|
+
save_todos(normalized, project_root)
|
|
139
|
+
return list(normalized)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def clear_todos(project_root: Optional[Path] = None) -> None:
|
|
143
|
+
"""Remove all todos."""
|
|
144
|
+
save_todos([], project_root)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def get_next_actionable(todos: Sequence[TodoItem]) -> Optional[TodoItem]:
|
|
148
|
+
"""Return the next todo to work on (in_progress first, then pending)."""
|
|
149
|
+
for status in ("in_progress", "pending"):
|
|
150
|
+
for todo in todos:
|
|
151
|
+
if todo.status == status:
|
|
152
|
+
return todo
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def summarize_todos(todos: Sequence[TodoItem]) -> dict:
|
|
157
|
+
"""Return simple statistics for a todo collection."""
|
|
158
|
+
return {
|
|
159
|
+
"total": len(todos),
|
|
160
|
+
"by_status": {
|
|
161
|
+
"pending": len([t for t in todos if t.status == "pending"]),
|
|
162
|
+
"in_progress": len([t for t in todos if t.status == "in_progress"]),
|
|
163
|
+
"completed": len([t for t in todos if t.status == "completed"]),
|
|
164
|
+
},
|
|
165
|
+
"by_priority": {
|
|
166
|
+
"high": len([t for t in todos if t.priority == "high"]),
|
|
167
|
+
"medium": len([t for t in todos if t.priority == "medium"]),
|
|
168
|
+
"low": len([t for t in todos if t.priority == "low"]),
|
|
169
|
+
},
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def format_todo_summary(todos: Sequence[TodoItem]) -> str:
|
|
174
|
+
"""Create a concise summary string for use in tool outputs."""
|
|
175
|
+
stats = summarize_todos(todos)
|
|
176
|
+
summary = (
|
|
177
|
+
f"Todos updated (total {stats['total']}; "
|
|
178
|
+
f"{stats['by_status']['pending']} pending, "
|
|
179
|
+
f"{stats['by_status']['in_progress']} in progress, "
|
|
180
|
+
f"{stats['by_status']['completed']} completed)."
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
next_item = get_next_actionable(todos)
|
|
184
|
+
if next_item:
|
|
185
|
+
summary += f" Next to tackle: {next_item.content} (id: {next_item.id}, status: {next_item.status})."
|
|
186
|
+
elif stats["total"] == 0:
|
|
187
|
+
summary += " No todos stored yet."
|
|
188
|
+
|
|
189
|
+
return summary
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def format_todo_lines(todos: Sequence[TodoItem]) -> List[str]:
|
|
193
|
+
"""Return human-readable todo lines."""
|
|
194
|
+
status_marker = {
|
|
195
|
+
"completed": "●",
|
|
196
|
+
"in_progress": "◐",
|
|
197
|
+
"pending": "○",
|
|
198
|
+
}
|
|
199
|
+
return [f"{status_marker.get(todo.status, '○')} {todo.content}" for todo in todos]
|