bareagent-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. bareagent/__init__.py +10 -0
  2. bareagent/concurrency/__init__.py +6 -0
  3. bareagent/concurrency/background.py +97 -0
  4. bareagent/concurrency/notification.py +61 -0
  5. bareagent/concurrency/scheduler.py +136 -0
  6. bareagent/config.toml +299 -0
  7. bareagent/core/__init__.py +1 -0
  8. bareagent/core/config_paths.py +49 -0
  9. bareagent/core/context.py +127 -0
  10. bareagent/core/fileutil.py +103 -0
  11. bareagent/core/goal.py +214 -0
  12. bareagent/core/handlers/__init__.py +1 -0
  13. bareagent/core/handlers/bash.py +79 -0
  14. bareagent/core/handlers/file_edit.py +47 -0
  15. bareagent/core/handlers/file_read.py +270 -0
  16. bareagent/core/handlers/file_write.py +34 -0
  17. bareagent/core/handlers/glob_search.py +30 -0
  18. bareagent/core/handlers/goal.py +60 -0
  19. bareagent/core/handlers/grep_search.py +52 -0
  20. bareagent/core/handlers/memory.py +71 -0
  21. bareagent/core/handlers/plan.py +106 -0
  22. bareagent/core/handlers/search_utils.py +77 -0
  23. bareagent/core/handlers/skill.py +87 -0
  24. bareagent/core/handlers/subagent_send.py +70 -0
  25. bareagent/core/handlers/web_fetch.py +126 -0
  26. bareagent/core/handlers/web_search.py +165 -0
  27. bareagent/core/handlers/workflow.py +190 -0
  28. bareagent/core/loop.py +535 -0
  29. bareagent/core/retry.py +131 -0
  30. bareagent/core/sandbox.py +27 -0
  31. bareagent/core/schema.py +21 -0
  32. bareagent/core/tools.py +779 -0
  33. bareagent/core/workflow.py +517 -0
  34. bareagent/core/workflow_registry.py +219 -0
  35. bareagent/debug/__init__.py +0 -0
  36. bareagent/debug/interaction_log.py +263 -0
  37. bareagent/debug/viewer.html +1750 -0
  38. bareagent/debug/web_viewer.py +157 -0
  39. bareagent/hooks/__init__.py +32 -0
  40. bareagent/hooks/config.py +118 -0
  41. bareagent/hooks/engine.py +197 -0
  42. bareagent/hooks/errors.py +14 -0
  43. bareagent/hooks/events.py +22 -0
  44. bareagent/lsp/__init__.py +63 -0
  45. bareagent/lsp/config.py +134 -0
  46. bareagent/lsp/coord.py +118 -0
  47. bareagent/lsp/diagnostics.py +240 -0
  48. bareagent/lsp/errors.py +24 -0
  49. bareagent/lsp/manager.py +866 -0
  50. bareagent/lsp/tools.py +629 -0
  51. bareagent/lsp/workspace_edit.py +305 -0
  52. bareagent/main.py +4205 -0
  53. bareagent/mcp/__init__.py +69 -0
  54. bareagent/mcp/_sse.py +69 -0
  55. bareagent/mcp/client.py +341 -0
  56. bareagent/mcp/config.py +169 -0
  57. bareagent/mcp/errors.py +32 -0
  58. bareagent/mcp/manager.py +318 -0
  59. bareagent/mcp/protocol.py +187 -0
  60. bareagent/mcp/registry.py +557 -0
  61. bareagent/mcp/transport/__init__.py +15 -0
  62. bareagent/mcp/transport/base.py +149 -0
  63. bareagent/mcp/transport/http_legacy.py +192 -0
  64. bareagent/mcp/transport/http_streamable.py +217 -0
  65. bareagent/mcp/transport/stdio.py +202 -0
  66. bareagent/memory/__init__.py +1 -0
  67. bareagent/memory/compact.py +203 -0
  68. bareagent/memory/conversation_io.py +226 -0
  69. bareagent/memory/embedding.py +194 -0
  70. bareagent/memory/persistent.py +515 -0
  71. bareagent/memory/token_counter.py +67 -0
  72. bareagent/memory/token_tracker.py +262 -0
  73. bareagent/memory/transcript.py +100 -0
  74. bareagent/permission/__init__.py +1 -0
  75. bareagent/permission/guard.py +329 -0
  76. bareagent/permission/rules.py +19 -0
  77. bareagent/planning/__init__.py +19 -0
  78. bareagent/planning/agent_types.py +169 -0
  79. bareagent/planning/skill_gen.py +141 -0
  80. bareagent/planning/skill_store.py +173 -0
  81. bareagent/planning/skills.py +146 -0
  82. bareagent/planning/subagent.py +355 -0
  83. bareagent/planning/subagent_registry.py +77 -0
  84. bareagent/planning/tasks.py +348 -0
  85. bareagent/planning/todo.py +153 -0
  86. bareagent/planning/worktree.py +122 -0
  87. bareagent/provider/__init__.py +1 -0
  88. bareagent/provider/anthropic.py +348 -0
  89. bareagent/provider/base.py +136 -0
  90. bareagent/provider/factory.py +130 -0
  91. bareagent/provider/openai.py +881 -0
  92. bareagent/provider/presets.py +72 -0
  93. bareagent/provider/setup.py +356 -0
  94. bareagent/skills/.gitkeep +1 -0
  95. bareagent/skills/code-review/SKILL.md +68 -0
  96. bareagent/skills/git/SKILL.md +68 -0
  97. bareagent/skills/test/SKILL.md +70 -0
  98. bareagent/team/__init__.py +17 -0
  99. bareagent/team/autonomous.py +193 -0
  100. bareagent/team/mailbox.py +239 -0
  101. bareagent/team/manager.py +155 -0
  102. bareagent/team/protocols.py +129 -0
  103. bareagent/tracing/__init__.py +12 -0
  104. bareagent/tracing/_api.py +92 -0
  105. bareagent/tracing/_proxy.py +60 -0
  106. bareagent/tracing/composite.py +115 -0
  107. bareagent/tracing/json_file.py +115 -0
  108. bareagent/tracing/langfuse.py +139 -0
  109. bareagent/tracing/otel.py +107 -0
  110. bareagent/tracing/setup.py +85 -0
  111. bareagent/ui/__init__.py +24 -0
  112. bareagent/ui/console.py +167 -0
  113. bareagent/ui/prompt.py +78 -0
  114. bareagent/ui/protocol.py +24 -0
  115. bareagent/ui/stream.py +66 -0
  116. bareagent/ui/theme.py +240 -0
  117. bareagent_cli-0.1.0.dist-info/METADATA +331 -0
  118. bareagent_cli-0.1.0.dist-info/RECORD +121 -0
  119. bareagent_cli-0.1.0.dist-info/WHEEL +4 -0
  120. bareagent_cli-0.1.0.dist-info/entry_points.txt +2 -0
  121. bareagent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,262 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any
5
+
6
+ # Built-in prices for the project's default Claude models, in USD per million
7
+ # tokens (input, output). DEFAULT_PRICES is a fallback only — prices drift, so
8
+ # the authoritative source is the user's [cost.prices] config, which overrides
9
+ # and extends these. Prefix-matched (startswith) so dated model ids such as
10
+ # "claude-opus-4-8-20251101" still resolve to the family price.
11
+ #
12
+ # NOTE: prices are reference values as of 2026-06 and MAY CHANGE; override them
13
+ # via [cost.prices] in config.toml / config.local.toml to keep them accurate.
14
+ DEFAULT_PRICES: dict[str, tuple[float, float]] = {
15
+ "claude-opus-4": (15.0, 75.0),
16
+ "claude-sonnet-4": (3.0, 15.0),
17
+ "claude-haiku-4": (1.0, 5.0),
18
+ }
19
+
20
+ # Prompt-cache price multipliers relative to a model's base *input* price,
21
+ # keyed by model-family prefix (longest-prefix matched like DEFAULT_PRICES):
22
+ # ``(read_multiplier, write_multiplier)``.
23
+ # - Anthropic: read 0.1x, write 1.25x (5m TTL; 1h's 2x is approximated as
24
+ # 1.25x — see PRD Out of Scope, estimate-only).
25
+ # - OpenAI: cached input billed ~0.5x, no separate write premium.
26
+ # - DeepSeek: cache hits ~0.1x, no separate write premium.
27
+ # Unknown models fall back to the Anthropic-like default (0.1, 1.25); cache
28
+ # tokens are only ever populated for providers covered here, so the fallback is
29
+ # a conservative estimate, never load-bearing.
30
+ DEFAULT_CACHE_MULTIPLIERS: dict[str, tuple[float, float]] = {
31
+ "claude": (0.1, 1.25),
32
+ "gpt": (0.5, 0.0),
33
+ "o1": (0.5, 0.0),
34
+ "o3": (0.5, 0.0),
35
+ "o4": (0.5, 0.0),
36
+ "deepseek": (0.1, 0.0),
37
+ }
38
+ _FALLBACK_CACHE_MULTIPLIERS: tuple[float, float] = (0.1, 1.25)
39
+
40
+ # Built-in prices are expressed per *million* tokens; convert to per-token.
41
+ _PER_MILLION = 1_000_000
42
+
43
+
44
+ def resolve_cache_multipliers(model: str) -> tuple[float, float]:
45
+ """Resolve ``(read_mult, write_mult)`` cache price multipliers for *model*.
46
+
47
+ Longest-prefix match against :data:`DEFAULT_CACHE_MULTIPLIERS`, falling back
48
+ to an Anthropic-like default when the family is unknown.
49
+ """
50
+ prefix = _longest_prefix_match(model, DEFAULT_CACHE_MULTIPLIERS.keys())
51
+ if prefix is not None:
52
+ return DEFAULT_CACHE_MULTIPLIERS[prefix]
53
+ return _FALLBACK_CACHE_MULTIPLIERS
54
+
55
+
56
+ def resolve_price(
57
+ model: str,
58
+ prices: dict[str, dict[str, float]] | None,
59
+ ) -> tuple[float, float] | None:
60
+ """Resolve (input, output) price per million tokens for *model*.
61
+
62
+ Lookup order:
63
+ 1. User-configured ``prices`` — exact match wins, then longest prefix match.
64
+ 2. Built-in :data:`DEFAULT_PRICES` — longest prefix match.
65
+
66
+ Returns ``None`` when no price is known (the caller shows token counts only,
67
+ never a fabricated cost).
68
+ """
69
+ if prices:
70
+ exact = prices.get(model)
71
+ if exact is not None:
72
+ resolved = _coerce_price_entry(exact)
73
+ if resolved is not None:
74
+ return resolved
75
+ prefix_match = _longest_prefix_match(model, prices.keys())
76
+ if prefix_match is not None:
77
+ resolved = _coerce_price_entry(prices[prefix_match])
78
+ if resolved is not None:
79
+ return resolved
80
+
81
+ builtin_prefix = _longest_prefix_match(model, DEFAULT_PRICES.keys())
82
+ if builtin_prefix is not None:
83
+ return DEFAULT_PRICES[builtin_prefix]
84
+ return None
85
+
86
+
87
+ def _longest_prefix_match(model: str, keys: Any) -> str | None:
88
+ """Return the longest key in *keys* that is a prefix of *model*."""
89
+ best: str | None = None
90
+ for key in keys:
91
+ if model.startswith(key) and (best is None or len(key) > len(best)):
92
+ best = key
93
+ return best
94
+
95
+
96
+ def _coerce_price_entry(entry: dict[str, float]) -> tuple[float, float] | None:
97
+ """Coerce a ``{input, output}`` config dict into an (input, output) tuple."""
98
+ if not isinstance(entry, dict):
99
+ return None
100
+ try:
101
+ return float(entry["input"]), float(entry["output"])
102
+ except (KeyError, TypeError, ValueError):
103
+ return None
104
+
105
+
106
+ @dataclass(slots=True)
107
+ class _ModelUsage:
108
+ input_tokens: int = 0
109
+ output_tokens: int = 0
110
+ cache_read_tokens: int = 0
111
+ cache_write_tokens: int = 0
112
+ call_count: int = 0
113
+
114
+
115
+ @dataclass(slots=True)
116
+ class TokenTracker:
117
+ """Process-level accumulator for LLM token usage during a session.
118
+
119
+ Records ``input_tokens`` / ``output_tokens`` from each :class:`LLMResponse`
120
+ plus a per-model breakdown. Pure logic with no I/O so it is unit-testable in
121
+ isolation. Reset on session boundaries (``/new`` / ``/clear`` / ``/resume``)
122
+ but not on in-session compaction (``/compact``).
123
+ """
124
+
125
+ total_input: int = 0
126
+ total_output: int = 0
127
+ total_cache_read: int = 0
128
+ total_cache_write: int = 0
129
+ call_count: int = 0
130
+ per_model: dict[str, _ModelUsage] = field(default_factory=dict)
131
+
132
+ @property
133
+ def total_tokens(self) -> int:
134
+ return self.total_input + self.total_output + self.total_cache_read + self.total_cache_write
135
+
136
+ def record(self, response: Any, model: str) -> None:
137
+ """Accumulate one LLM response's token usage under *model*.
138
+
139
+ Reads only the normalized usage fields off the response so it never
140
+ couples to a specific provider's wire shape. ``input_tokens`` is the
141
+ full-price remainder; cache read/write are additive and non-overlapping
142
+ (see ``LLMResponse``), so summing all four gives the true prompt size.
143
+ """
144
+ input_tokens = int(getattr(response, "input_tokens", 0) or 0)
145
+ output_tokens = int(getattr(response, "output_tokens", 0) or 0)
146
+ cache_read = int(getattr(response, "cache_read_input_tokens", 0) or 0)
147
+ cache_write = int(getattr(response, "cache_creation_input_tokens", 0) or 0)
148
+
149
+ self.total_input += input_tokens
150
+ self.total_output += output_tokens
151
+ self.total_cache_read += cache_read
152
+ self.total_cache_write += cache_write
153
+ self.call_count += 1
154
+
155
+ usage = self.per_model.get(model)
156
+ if usage is None:
157
+ usage = _ModelUsage()
158
+ self.per_model[model] = usage
159
+ usage.input_tokens += input_tokens
160
+ usage.output_tokens += output_tokens
161
+ usage.cache_read_tokens += cache_read
162
+ usage.cache_write_tokens += cache_write
163
+ usage.call_count += 1
164
+
165
+ def reset(self) -> None:
166
+ """Clear all accumulated usage (session boundary)."""
167
+ self.total_input = 0
168
+ self.total_output = 0
169
+ self.total_cache_read = 0
170
+ self.total_cache_write = 0
171
+ self.call_count = 0
172
+ self.per_model.clear()
173
+
174
+ def estimate_cost(
175
+ self,
176
+ prices: dict[str, dict[str, float]] | None,
177
+ ) -> float | None:
178
+ """Estimate total cost in USD across all priced models.
179
+
180
+ Models without a known price are skipped (their tokens still count, but
181
+ contribute no dollars). Returns ``None`` only when *no* recorded model
182
+ has a price, so the caller can suppress the ``$`` line entirely rather
183
+ than print ``$0.00``.
184
+ """
185
+ total = 0.0
186
+ any_priced = False
187
+ for model, usage in self.per_model.items():
188
+ price = resolve_price(model, prices)
189
+ if price is None:
190
+ continue
191
+ any_priced = True
192
+ input_price, output_price = price
193
+ read_mult, write_mult = resolve_cache_multipliers(model)
194
+ total += usage.input_tokens / _PER_MILLION * input_price
195
+ total += usage.output_tokens / _PER_MILLION * output_price
196
+ total += usage.cache_read_tokens / _PER_MILLION * input_price * read_mult
197
+ total += usage.cache_write_tokens / _PER_MILLION * input_price * write_mult
198
+ return total if any_priced else None
199
+
200
+ def summary(self, prices: dict[str, dict[str, float]] | None) -> str:
201
+ """Render a human-readable usage summary for the ``/cost`` command.
202
+
203
+ Always shows token counts (total input/output/total + call_count +
204
+ per-model breakdown). Priced models show their ``$`` estimate inline;
205
+ unpriced models are tagged ``(no price)``. A total cost line is added
206
+ only when at least one model is priced.
207
+ """
208
+ lines = [
209
+ "Token usage (this session):",
210
+ f" Input: {self.total_input:,} tokens",
211
+ f" Output: {self.total_output:,} tokens",
212
+ ]
213
+ # Only surface the cache line when caching actually happened, so
214
+ # non-cached sessions keep the original compact output.
215
+ if self.total_cache_read or self.total_cache_write:
216
+ lines.append(
217
+ f" Cache: {self.total_cache_read:,} read / "
218
+ f"{self.total_cache_write:,} write tokens"
219
+ )
220
+ lines.extend(
221
+ [
222
+ f" Total: {self.total_tokens:,} tokens",
223
+ f" Calls: {self.call_count}",
224
+ ]
225
+ )
226
+
227
+ if self.per_model:
228
+ lines.append(" By model:")
229
+ for model in sorted(self.per_model):
230
+ usage = self.per_model[model]
231
+ price = resolve_price(model, prices)
232
+ if price is None:
233
+ cost_label = " (no price)"
234
+ else:
235
+ input_price, output_price = price
236
+ read_mult, write_mult = resolve_cache_multipliers(model)
237
+ model_cost = (
238
+ usage.input_tokens / _PER_MILLION * input_price
239
+ + usage.output_tokens / _PER_MILLION * output_price
240
+ + usage.cache_read_tokens / _PER_MILLION * input_price * read_mult
241
+ + usage.cache_write_tokens / _PER_MILLION * input_price * write_mult
242
+ )
243
+ cost_label = f" — ${model_cost:.4f}"
244
+ cache_label = ""
245
+ if usage.cache_read_tokens or usage.cache_write_tokens:
246
+ cache_label = (
247
+ f", {usage.cache_read_tokens:,} cache-read / "
248
+ f"{usage.cache_write_tokens:,} cache-write"
249
+ )
250
+ lines.append(
251
+ f" {model}: "
252
+ f"{usage.input_tokens:,} in / {usage.output_tokens:,} out"
253
+ f"{cache_label} "
254
+ f"({usage.call_count} calls){cost_label}"
255
+ )
256
+
257
+ total_cost = self.estimate_cost(prices)
258
+ if total_cost is not None:
259
+ lines.append(f" Estimated cost: ${total_cost:.4f}")
260
+ lines.append(" (prices are estimates; override via [cost.prices] in config)")
261
+
262
+ return "\n".join(lines)
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ _SAVE_TIMESTAMP_FORMAT = "%Y-%m-%dT%H-%M-%S-%f"
10
+ _TIMESTAMP_FORMATS = (
11
+ _SAVE_TIMESTAMP_FORMAT,
12
+ "%Y-%m-%dT%H-%M-%S",
13
+ )
14
+
15
+
16
+ @dataclass(slots=True)
17
+ class _TranscriptEntry:
18
+ session_id: str
19
+ timestamp: datetime
20
+ path: Path
21
+
22
+
23
+ class TranscriptManager:
24
+ def __init__(self, transcript_dir: str | Path = ".transcripts") -> None:
25
+ self.transcript_dir = Path(transcript_dir)
26
+ self.transcript_dir.mkdir(parents=True, exist_ok=True)
27
+
28
+ def save(self, messages: list[dict[str, Any]], session_id: str) -> Path:
29
+ timestamp = datetime.now().strftime(_SAVE_TIMESTAMP_FORMAT)
30
+ path = self.transcript_dir / f"{session_id}_{timestamp}.jsonl"
31
+ with path.open("w", encoding="utf-8") as file:
32
+ for message in messages:
33
+ file.write(json.dumps(message, ensure_ascii=False))
34
+ file.write("\n")
35
+ return path
36
+
37
+ def load(self, session_id: str) -> list[dict[str, Any]]:
38
+ entry = self._get_session_entry(session_id)
39
+ with entry.path.open("r", encoding="utf-8") as file:
40
+ return [json.loads(line) for line in file if line.strip()]
41
+
42
+ def list_sessions(self) -> list[str]:
43
+ latest_by_session: dict[str, datetime] = {}
44
+ for entry in self._iter_entries():
45
+ latest_by_session[entry.session_id] = max(
46
+ entry.timestamp,
47
+ latest_by_session.get(entry.session_id, datetime.min),
48
+ )
49
+ return [
50
+ session_id
51
+ for session_id, _ in sorted(
52
+ latest_by_session.items(),
53
+ key=lambda item: item[1],
54
+ reverse=True,
55
+ )
56
+ ]
57
+
58
+ def get_latest_session(self) -> str | None:
59
+ entries = self._iter_entries()
60
+ if not entries:
61
+ return None
62
+ return max(entries, key=lambda entry: entry.timestamp).session_id
63
+
64
+ def resume(self, session_id: str | None = None) -> list[dict[str, Any]]:
65
+ target_session = session_id or self.get_latest_session()
66
+ if target_session is None:
67
+ raise FileNotFoundError("No saved transcripts found.")
68
+ return self.load(target_session)
69
+
70
+ def _get_session_entry(self, session_id: str) -> _TranscriptEntry:
71
+ entries = [
72
+ entry for entry in self._iter_entries() if entry.session_id == session_id
73
+ ]
74
+ if not entries:
75
+ raise FileNotFoundError(f"No transcript found for session: {session_id}")
76
+ return max(entries, key=lambda entry: entry.timestamp)
77
+
78
+ def _iter_entries(self) -> list[_TranscriptEntry]:
79
+ entries: list[_TranscriptEntry] = []
80
+ for path in self.transcript_dir.glob("*.jsonl"):
81
+ entry = self._parse_entry(path)
82
+ if entry is not None:
83
+ entries.append(entry)
84
+ return entries
85
+
86
+ def _parse_entry(self, path: Path) -> _TranscriptEntry | None:
87
+ stem = path.stem
88
+ if "_" not in stem:
89
+ return None
90
+ session_id, raw_timestamp = stem.rsplit("_", 1)
91
+ timestamp: datetime | None = None
92
+ for fmt in _TIMESTAMP_FORMATS:
93
+ try:
94
+ timestamp = datetime.strptime(raw_timestamp, fmt)
95
+ break
96
+ except ValueError:
97
+ continue
98
+ if timestamp is None:
99
+ return None
100
+ return _TranscriptEntry(session_id=session_id, timestamp=timestamp, path=path)
@@ -0,0 +1 @@
1
+ """Permission control modules for BareAgent."""
@@ -0,0 +1,329 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ import sys
6
+ from collections.abc import Callable
7
+ from enum import Enum
8
+ from typing import TYPE_CHECKING, Any
9
+
10
+ if TYPE_CHECKING:
11
+ from bareagent.planning.agent_types import AgentType
12
+
13
+
14
+ class PermissionMode(Enum):
15
+ DEFAULT = "default"
16
+ AUTO = "auto"
17
+ PLAN = "plan"
18
+ BYPASS = "bypass"
19
+
20
+
21
+ _SHELLS = "bash|sh|zsh|dash|ksh|fish"
22
+
23
+ _MCP_TOOL_PREFIX = "mcp__"
24
+ # Preview limits for MCP ask prompts. MCP args are JSON, not shell text, and
25
+ # servers can produce arbitrarily large strings (file blobs, long URLs). Cap
26
+ # top-level string values so a single field can't flood the terminal.
27
+ _MCP_PREVIEW_FIELD_LIMIT = 256
28
+
29
+
30
+ def _is_mcp_tool(tool_name: str) -> bool:
31
+ """Return True if ``tool_name`` follows the ``mcp__<server>__<tool>`` namespace."""
32
+ return tool_name.startswith(_MCP_TOOL_PREFIX)
33
+
34
+
35
+ class PermissionGuard:
36
+ SAFE_TOOLS = {
37
+ "read_file",
38
+ "glob",
39
+ "grep",
40
+ "todo_read",
41
+ "todo_write",
42
+ "load_skill",
43
+ "task_list",
44
+ "task_get",
45
+ "team_list",
46
+ "web_fetch",
47
+ "web_search",
48
+ # Memory is sandboxed to its own directory (never user code) and is
49
+ # agent bookkeeping; prompting on every recall/save would be noise.
50
+ # Read-only isolation for sub-agents is handled at the AgentType layer
51
+ # (memory_writable), not here.
52
+ "memory",
53
+ # skill_create writes only to the generated-skills pending sandbox and
54
+ # is exposed only inside the isolated reflection call (never the main
55
+ # tool set / sub-agents), so prompting would be noise.
56
+ "skill_create",
57
+ # goal_verdict only records the evaluator's judgement into an in-memory
58
+ # sink (no workspace side effects) and is exposed only inside the
59
+ # isolated goal-evaluator call (never the main tool set / sub-agents),
60
+ # so prompting would be noise.
61
+ "goal_verdict",
62
+ # exit_plan_mode is the *only* way out of PLAN mode; it MUST be allowed
63
+ # while in PLAN (a non-SAFE tool is blocked there). Its own action is the
64
+ # approval prompt, so a separate permission confirm would be redundant.
65
+ # It is a main-loop-only tool (never in the global set / sub-agents).
66
+ "exit_plan_mode",
67
+ }
68
+ AUTO_SAFE_PATTERNS = [
69
+ re.compile(r"^(ls|cat|head|tail|wc|echo|pwd|date|which|type)\b"),
70
+ re.compile(r"^git\s+(status|log|diff|branch|show)\b"),
71
+ re.compile(r"^(pytest|python\s+-m\s+pytest|ruff|mypy)\b"),
72
+ re.compile(r"^npm\s+(test|run\s+lint|run\s+test)\b"),
73
+ ]
74
+ DANGEROUS_PATTERNS = [
75
+ re.compile(r"(^|\s)rm\s+-[rR]f?\b"),
76
+ re.compile(r"git\s+push\s+--force\b"),
77
+ re.compile(r"git\s+reset\s+--hard\b"),
78
+ re.compile(r"DROP\s+TABLE\b", re.IGNORECASE),
79
+ re.compile(r"DELETE\s+FROM\b", re.IGNORECASE),
80
+ # shell wrapper bypass
81
+ re.compile(rf"(^|\s)({_SHELLS})\s+-c\b"),
82
+ # absolute-path rm bypass
83
+ re.compile(r"(^|\s)/(?:usr/)?bin/rm\b"),
84
+ # env prefix bypass
85
+ re.compile(r"(^|\s)env\s+"),
86
+ # pipe-to-shell execution
87
+ re.compile(rf"curl\b.*\|\s*({_SHELLS})\b"),
88
+ re.compile(rf"wget\b.*\|\s*({_SHELLS})\b"),
89
+ # destructive system commands
90
+ re.compile(r"(^|\s)chmod\s+777\b"),
91
+ re.compile(r"(^|\s)mkfs\b"),
92
+ re.compile(r"(^|\s)dd\s+if="),
93
+ re.compile(r"find\b.*-delete\b"),
94
+ ]
95
+
96
+ def __init__(
97
+ self,
98
+ mode: PermissionMode = PermissionMode.DEFAULT,
99
+ *,
100
+ fail_closed: bool = False,
101
+ ask_user_fn: Callable[[Any], bool] | None = None,
102
+ ) -> None:
103
+ self.mode = mode
104
+ self.allow_rules: list[str] = []
105
+ self.deny_rules: list[str] = []
106
+ self.fail_closed = fail_closed
107
+ self._ask_user_fn = ask_user_fn
108
+
109
+ def requires_confirm(self, tool_name: str, tool_input: dict[str, Any]) -> bool:
110
+ if self.mode == PermissionMode.BYPASS:
111
+ return False
112
+ normalized_tool = tool_name.strip().lower()
113
+ rule_subject = permission_rule_subject(normalized_tool, tool_input)
114
+ # MCP tools carry JSON args (not shell text), so DANGEROUS_PATTERNS
115
+ # are not applicable. Branch early on mode but still honour the
116
+ # generic allow/deny prefix rules (handled below via rule_subject).
117
+ if _is_mcp_tool(normalized_tool):
118
+ # PLAN mode rejects every MCP tool by policy — MCP servers have
119
+ # unknown side effects and are not in SAFE_TOOLS. This check runs
120
+ # before allow_rules so an allowlist in config.toml cannot punch
121
+ # holes through PLAN.
122
+ if self.mode == PermissionMode.PLAN:
123
+ return True
124
+ if rule_subject and self._match_rules(
125
+ self.deny_rules,
126
+ normalized_tool,
127
+ rule_subject,
128
+ ):
129
+ return True
130
+ if rule_subject and self._match_rules(
131
+ self.allow_rules,
132
+ normalized_tool,
133
+ rule_subject,
134
+ ):
135
+ return False
136
+ if self.mode == PermissionMode.AUTO:
137
+ return False
138
+ # DEFAULT: always ask for MCP tools.
139
+ return True
140
+ if self.mode == PermissionMode.PLAN:
141
+ return normalized_tool not in self.SAFE_TOOLS
142
+ if normalized_tool == "bash":
143
+ cmd = rule_subject or ""
144
+ if self._match_rules(self.deny_rules, normalized_tool, cmd):
145
+ return True
146
+ if any(pattern.search(cmd) for pattern in self.DANGEROUS_PATTERNS):
147
+ return True
148
+ if self._match_rules(self.allow_rules, normalized_tool, cmd):
149
+ return False
150
+ if any(pattern.search(cmd) for pattern in self.AUTO_SAFE_PATTERNS):
151
+ return False
152
+ if self.mode == PermissionMode.DEFAULT:
153
+ return True
154
+ # AUTO mode: not matching any dangerous pattern, allow
155
+ return False
156
+
157
+ if rule_subject and self._match_rules(
158
+ self.deny_rules,
159
+ normalized_tool,
160
+ rule_subject,
161
+ ):
162
+ return True
163
+ if normalized_tool in self.SAFE_TOOLS:
164
+ return False
165
+ if normalized_tool in {"edit_file", "task_create", "task_update"}:
166
+ return False
167
+ if rule_subject and self._match_rules(
168
+ self.allow_rules,
169
+ normalized_tool,
170
+ rule_subject,
171
+ ):
172
+ return False
173
+ if normalized_tool in {"write_file", "semantic_rename"}:
174
+ # Write tools: confirm in DEFAULT, auto-approve in AUTO. PLAN was
175
+ # already rejected above (not in SAFE_TOOLS), BYPASS short-circuited
176
+ # at the top.
177
+ return self.mode == PermissionMode.DEFAULT
178
+ return True
179
+
180
+ def is_dangerous(self, tool_name: str, tool_input: dict[str, Any]) -> bool:
181
+ """Return True if ``tool_name`` + ``tool_input`` match a known dangerous shell pattern.
182
+
183
+ DANGEROUS_PATTERNS encode shell-text heuristics (``rm -rf``,
184
+ ``git push --force``, ``DROP TABLE``...). They are intentionally
185
+ skipped for MCP tools, whose ``tool_input`` is JSON rather than a
186
+ shell command — applying shell regexes against JSON would produce
187
+ false positives without catching anything real.
188
+ """
189
+ normalized_tool = tool_name.strip().lower()
190
+ if _is_mcp_tool(normalized_tool):
191
+ return False
192
+ if normalized_tool == "bash":
193
+ cmd = str(tool_input.get("command", ""))
194
+ return any(pattern.search(cmd) for pattern in self.DANGEROUS_PATTERNS)
195
+ return False
196
+
197
+ def format_preview(self, tool_name: str, tool_input: dict[str, Any]) -> str:
198
+ """Return a human-readable JSON preview of ``tool_input`` for ask prompts.
199
+
200
+ Top-level string values longer than ``_MCP_PREVIEW_FIELD_LIMIT`` are
201
+ truncated with a ``... [truncated, N chars]`` suffix so a single huge
202
+ argument (file blob, long URL) cannot drown the terminal. Nested
203
+ structures are not recursively truncated — v1 keeps the rule simple.
204
+ """
205
+ if not isinstance(tool_input, dict) or not tool_input:
206
+ return json.dumps(tool_input, ensure_ascii=False, indent=2)
207
+ prepared: dict[str, Any] = {}
208
+ for key, value in tool_input.items():
209
+ if isinstance(value, str) and len(value) > _MCP_PREVIEW_FIELD_LIMIT:
210
+ prepared[key] = (
211
+ value[:_MCP_PREVIEW_FIELD_LIMIT] + f"... [truncated, {len(value)} chars]"
212
+ )
213
+ else:
214
+ prepared[key] = value
215
+ return json.dumps(prepared, ensure_ascii=False, indent=2, default=str)
216
+
217
+ def ask_user(self, call: Any) -> bool:
218
+ if self.fail_closed:
219
+ return False
220
+ if self.mode == PermissionMode.PLAN:
221
+ print(f"Plan mode: {call.name} blocked (read-only)")
222
+ return False
223
+ if self._ask_user_fn is not None:
224
+ return self._ask_user_fn(call)
225
+ if not sys.stdin.isatty():
226
+ print(f"Non-interactive environment: {call.name} denied")
227
+ return False
228
+ print(f"{call.name}: {json.dumps(call.input, ensure_ascii=False)[:200]}")
229
+ try:
230
+ return input("Allow? [y/N] ").strip().lower() == "y"
231
+ except EOFError:
232
+ return False
233
+
234
+ def _match_rules(self, rules: list[str], tool_name: str, cmd: str) -> bool:
235
+ normalized_tool = tool_name.strip().lower()
236
+ for rule in rules:
237
+ parsed = _parse_prefix_rule(rule)
238
+ if parsed is None:
239
+ continue
240
+ rule_tool, prefix = parsed
241
+ if rule_tool != normalized_tool:
242
+ continue
243
+ if cmd.strip().startswith(prefix):
244
+ return True
245
+ return False
246
+
247
+ def clone(
248
+ self, *, mode: PermissionMode | None = None, fail_closed: bool | None = None
249
+ ) -> PermissionGuard:
250
+ """Create a copy of this guard with optional overrides."""
251
+ child = PermissionGuard(
252
+ mode=mode if mode is not None else self.mode,
253
+ fail_closed=fail_closed if fail_closed is not None else self.fail_closed,
254
+ ask_user_fn=self._ask_user_fn,
255
+ )
256
+ child.allow_rules = list(self.allow_rules)
257
+ child.deny_rules = list(self.deny_rules)
258
+ return child
259
+
260
+ def for_subagent(
261
+ self,
262
+ agent_type: AgentType,
263
+ *,
264
+ background: bool = False,
265
+ ) -> PermissionGuard:
266
+ """Clone the guard for child-agent execution."""
267
+ resolved_mode = (
268
+ agent_type.permission_mode if agent_type.permission_mode is not None else self.mode
269
+ )
270
+ return self.clone(
271
+ mode=resolved_mode,
272
+ fail_closed=self.fail_closed or background or resolved_mode == PermissionMode.PLAN,
273
+ )
274
+
275
+
276
+ def _parse_prefix_rule(rule: str) -> tuple[str, str] | None:
277
+ match = re.fullmatch(
278
+ r"\s*([A-Za-z_][A-Za-z0-9_]*)\((prefix|prefix_json):([\s\S]+)\)\s*",
279
+ rule,
280
+ )
281
+ if match is None:
282
+ return None
283
+ tool_name = match.group(1).strip().lower()
284
+ rule_kind = match.group(2)
285
+ raw_prefix = match.group(3)
286
+ if rule_kind == "prefix_json":
287
+ try:
288
+ parsed_prefix = json.loads(raw_prefix)
289
+ except json.JSONDecodeError:
290
+ return None
291
+ if not isinstance(parsed_prefix, str):
292
+ return None
293
+ return tool_name, parsed_prefix
294
+ prefix = raw_prefix.rstrip("*").strip()
295
+ return tool_name, prefix
296
+
297
+
298
+ def permission_rule_subject(tool_name: str, tool_input: dict[str, Any]) -> str | None:
299
+ normalized_tool = tool_name.strip().lower()
300
+ if normalized_tool == "bash":
301
+ command = str(tool_input.get("command", "")).strip()
302
+ return command or None
303
+
304
+ for key in ("file_path", "path", "name", "to_agent", "task_id", "skill_name"):
305
+ value = tool_input.get(key)
306
+ if not isinstance(value, str):
307
+ continue
308
+ subject = value.strip()
309
+ if subject:
310
+ return subject
311
+
312
+ if "task" in tool_input:
313
+ task = str(tool_input.get("task", "")).strip()
314
+ if task:
315
+ return task
316
+
317
+ if not tool_input:
318
+ return None
319
+
320
+ try:
321
+ serialized = json.dumps(
322
+ tool_input,
323
+ ensure_ascii=False,
324
+ sort_keys=True,
325
+ default=str,
326
+ )
327
+ except (TypeError, ValueError):
328
+ serialized = str(tool_input).strip()
329
+ return serialized or None