bareagent-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bareagent/__init__.py +10 -0
- bareagent/concurrency/__init__.py +6 -0
- bareagent/concurrency/background.py +97 -0
- bareagent/concurrency/notification.py +61 -0
- bareagent/concurrency/scheduler.py +136 -0
- bareagent/config.toml +299 -0
- bareagent/core/__init__.py +1 -0
- bareagent/core/config_paths.py +49 -0
- bareagent/core/context.py +127 -0
- bareagent/core/fileutil.py +103 -0
- bareagent/core/goal.py +214 -0
- bareagent/core/handlers/__init__.py +1 -0
- bareagent/core/handlers/bash.py +79 -0
- bareagent/core/handlers/file_edit.py +47 -0
- bareagent/core/handlers/file_read.py +270 -0
- bareagent/core/handlers/file_write.py +34 -0
- bareagent/core/handlers/glob_search.py +30 -0
- bareagent/core/handlers/goal.py +60 -0
- bareagent/core/handlers/grep_search.py +52 -0
- bareagent/core/handlers/memory.py +71 -0
- bareagent/core/handlers/plan.py +106 -0
- bareagent/core/handlers/search_utils.py +77 -0
- bareagent/core/handlers/skill.py +87 -0
- bareagent/core/handlers/subagent_send.py +70 -0
- bareagent/core/handlers/web_fetch.py +126 -0
- bareagent/core/handlers/web_search.py +165 -0
- bareagent/core/handlers/workflow.py +190 -0
- bareagent/core/loop.py +535 -0
- bareagent/core/retry.py +131 -0
- bareagent/core/sandbox.py +27 -0
- bareagent/core/schema.py +21 -0
- bareagent/core/tools.py +779 -0
- bareagent/core/workflow.py +517 -0
- bareagent/core/workflow_registry.py +219 -0
- bareagent/debug/__init__.py +0 -0
- bareagent/debug/interaction_log.py +263 -0
- bareagent/debug/viewer.html +1750 -0
- bareagent/debug/web_viewer.py +157 -0
- bareagent/hooks/__init__.py +32 -0
- bareagent/hooks/config.py +118 -0
- bareagent/hooks/engine.py +197 -0
- bareagent/hooks/errors.py +14 -0
- bareagent/hooks/events.py +22 -0
- bareagent/lsp/__init__.py +63 -0
- bareagent/lsp/config.py +134 -0
- bareagent/lsp/coord.py +118 -0
- bareagent/lsp/diagnostics.py +240 -0
- bareagent/lsp/errors.py +24 -0
- bareagent/lsp/manager.py +866 -0
- bareagent/lsp/tools.py +629 -0
- bareagent/lsp/workspace_edit.py +305 -0
- bareagent/main.py +4205 -0
- bareagent/mcp/__init__.py +69 -0
- bareagent/mcp/_sse.py +69 -0
- bareagent/mcp/client.py +341 -0
- bareagent/mcp/config.py +169 -0
- bareagent/mcp/errors.py +32 -0
- bareagent/mcp/manager.py +318 -0
- bareagent/mcp/protocol.py +187 -0
- bareagent/mcp/registry.py +557 -0
- bareagent/mcp/transport/__init__.py +15 -0
- bareagent/mcp/transport/base.py +149 -0
- bareagent/mcp/transport/http_legacy.py +192 -0
- bareagent/mcp/transport/http_streamable.py +217 -0
- bareagent/mcp/transport/stdio.py +202 -0
- bareagent/memory/__init__.py +1 -0
- bareagent/memory/compact.py +203 -0
- bareagent/memory/conversation_io.py +226 -0
- bareagent/memory/embedding.py +194 -0
- bareagent/memory/persistent.py +515 -0
- bareagent/memory/token_counter.py +67 -0
- bareagent/memory/token_tracker.py +262 -0
- bareagent/memory/transcript.py +100 -0
- bareagent/permission/__init__.py +1 -0
- bareagent/permission/guard.py +329 -0
- bareagent/permission/rules.py +19 -0
- bareagent/planning/__init__.py +19 -0
- bareagent/planning/agent_types.py +169 -0
- bareagent/planning/skill_gen.py +141 -0
- bareagent/planning/skill_store.py +173 -0
- bareagent/planning/skills.py +146 -0
- bareagent/planning/subagent.py +355 -0
- bareagent/planning/subagent_registry.py +77 -0
- bareagent/planning/tasks.py +348 -0
- bareagent/planning/todo.py +153 -0
- bareagent/planning/worktree.py +122 -0
- bareagent/provider/__init__.py +1 -0
- bareagent/provider/anthropic.py +348 -0
- bareagent/provider/base.py +136 -0
- bareagent/provider/factory.py +130 -0
- bareagent/provider/openai.py +881 -0
- bareagent/provider/presets.py +72 -0
- bareagent/provider/setup.py +356 -0
- bareagent/skills/.gitkeep +1 -0
- bareagent/skills/code-review/SKILL.md +68 -0
- bareagent/skills/git/SKILL.md +68 -0
- bareagent/skills/test/SKILL.md +70 -0
- bareagent/team/__init__.py +17 -0
- bareagent/team/autonomous.py +193 -0
- bareagent/team/mailbox.py +239 -0
- bareagent/team/manager.py +155 -0
- bareagent/team/protocols.py +129 -0
- bareagent/tracing/__init__.py +12 -0
- bareagent/tracing/_api.py +92 -0
- bareagent/tracing/_proxy.py +60 -0
- bareagent/tracing/composite.py +115 -0
- bareagent/tracing/json_file.py +115 -0
- bareagent/tracing/langfuse.py +139 -0
- bareagent/tracing/otel.py +107 -0
- bareagent/tracing/setup.py +85 -0
- bareagent/ui/__init__.py +24 -0
- bareagent/ui/console.py +167 -0
- bareagent/ui/prompt.py +78 -0
- bareagent/ui/protocol.py +24 -0
- bareagent/ui/stream.py +66 -0
- bareagent/ui/theme.py +240 -0
- bareagent_cli-0.1.0.dist-info/METADATA +331 -0
- bareagent_cli-0.1.0.dist-info/RECORD +121 -0
- bareagent_cli-0.1.0.dist-info/WHEEL +4 -0
- bareagent_cli-0.1.0.dist-info/entry_points.txt +2 -0
- bareagent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
# Built-in prices for the project's default Claude models, in USD per million
|
|
7
|
+
# tokens (input, output). DEFAULT_PRICES is a fallback only — prices drift, so
|
|
8
|
+
# the authoritative source is the user's [cost.prices] config, which overrides
|
|
9
|
+
# and extends these. Prefix-matched (startswith) so dated model ids such as
|
|
10
|
+
# "claude-opus-4-8-20251101" still resolve to the family price.
|
|
11
|
+
#
|
|
12
|
+
# NOTE: prices are reference values as of 2026-06 and MAY CHANGE; override them
|
|
13
|
+
# via [cost.prices] in config.toml / config.local.toml to keep them accurate.
|
|
14
|
+
DEFAULT_PRICES: dict[str, tuple[float, float]] = {
|
|
15
|
+
"claude-opus-4": (15.0, 75.0),
|
|
16
|
+
"claude-sonnet-4": (3.0, 15.0),
|
|
17
|
+
"claude-haiku-4": (1.0, 5.0),
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
# Prompt-cache price multipliers relative to a model's base *input* price,
|
|
21
|
+
# keyed by model-family prefix (longest-prefix matched like DEFAULT_PRICES):
|
|
22
|
+
# ``(read_multiplier, write_multiplier)``.
|
|
23
|
+
# - Anthropic: read 0.1x, write 1.25x (5m TTL; 1h's 2x is approximated as
|
|
24
|
+
# 1.25x — see PRD Out of Scope, estimate-only).
|
|
25
|
+
# - OpenAI: cached input billed ~0.5x, no separate write premium.
|
|
26
|
+
# - DeepSeek: cache hits ~0.1x, no separate write premium.
|
|
27
|
+
# Unknown models fall back to the Anthropic-like default (0.1, 1.25); cache
|
|
28
|
+
# tokens are only ever populated for providers covered here, so the fallback is
|
|
29
|
+
# a conservative estimate, never load-bearing.
|
|
30
|
+
DEFAULT_CACHE_MULTIPLIERS: dict[str, tuple[float, float]] = {
|
|
31
|
+
"claude": (0.1, 1.25),
|
|
32
|
+
"gpt": (0.5, 0.0),
|
|
33
|
+
"o1": (0.5, 0.0),
|
|
34
|
+
"o3": (0.5, 0.0),
|
|
35
|
+
"o4": (0.5, 0.0),
|
|
36
|
+
"deepseek": (0.1, 0.0),
|
|
37
|
+
}
|
|
38
|
+
_FALLBACK_CACHE_MULTIPLIERS: tuple[float, float] = (0.1, 1.25)
|
|
39
|
+
|
|
40
|
+
# Built-in prices are expressed per *million* tokens; convert to per-token.
|
|
41
|
+
_PER_MILLION = 1_000_000
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def resolve_cache_multipliers(model: str) -> tuple[float, float]:
|
|
45
|
+
"""Resolve ``(read_mult, write_mult)`` cache price multipliers for *model*.
|
|
46
|
+
|
|
47
|
+
Longest-prefix match against :data:`DEFAULT_CACHE_MULTIPLIERS`, falling back
|
|
48
|
+
to an Anthropic-like default when the family is unknown.
|
|
49
|
+
"""
|
|
50
|
+
prefix = _longest_prefix_match(model, DEFAULT_CACHE_MULTIPLIERS.keys())
|
|
51
|
+
if prefix is not None:
|
|
52
|
+
return DEFAULT_CACHE_MULTIPLIERS[prefix]
|
|
53
|
+
return _FALLBACK_CACHE_MULTIPLIERS
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def resolve_price(
|
|
57
|
+
model: str,
|
|
58
|
+
prices: dict[str, dict[str, float]] | None,
|
|
59
|
+
) -> tuple[float, float] | None:
|
|
60
|
+
"""Resolve (input, output) price per million tokens for *model*.
|
|
61
|
+
|
|
62
|
+
Lookup order:
|
|
63
|
+
1. User-configured ``prices`` — exact match wins, then longest prefix match.
|
|
64
|
+
2. Built-in :data:`DEFAULT_PRICES` — longest prefix match.
|
|
65
|
+
|
|
66
|
+
Returns ``None`` when no price is known (the caller shows token counts only,
|
|
67
|
+
never a fabricated cost).
|
|
68
|
+
"""
|
|
69
|
+
if prices:
|
|
70
|
+
exact = prices.get(model)
|
|
71
|
+
if exact is not None:
|
|
72
|
+
resolved = _coerce_price_entry(exact)
|
|
73
|
+
if resolved is not None:
|
|
74
|
+
return resolved
|
|
75
|
+
prefix_match = _longest_prefix_match(model, prices.keys())
|
|
76
|
+
if prefix_match is not None:
|
|
77
|
+
resolved = _coerce_price_entry(prices[prefix_match])
|
|
78
|
+
if resolved is not None:
|
|
79
|
+
return resolved
|
|
80
|
+
|
|
81
|
+
builtin_prefix = _longest_prefix_match(model, DEFAULT_PRICES.keys())
|
|
82
|
+
if builtin_prefix is not None:
|
|
83
|
+
return DEFAULT_PRICES[builtin_prefix]
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _longest_prefix_match(model: str, keys: Any) -> str | None:
|
|
88
|
+
"""Return the longest key in *keys* that is a prefix of *model*."""
|
|
89
|
+
best: str | None = None
|
|
90
|
+
for key in keys:
|
|
91
|
+
if model.startswith(key) and (best is None or len(key) > len(best)):
|
|
92
|
+
best = key
|
|
93
|
+
return best
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _coerce_price_entry(entry: dict[str, float]) -> tuple[float, float] | None:
|
|
97
|
+
"""Coerce a ``{input, output}`` config dict into an (input, output) tuple."""
|
|
98
|
+
if not isinstance(entry, dict):
|
|
99
|
+
return None
|
|
100
|
+
try:
|
|
101
|
+
return float(entry["input"]), float(entry["output"])
|
|
102
|
+
except (KeyError, TypeError, ValueError):
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dataclass(slots=True)
|
|
107
|
+
class _ModelUsage:
|
|
108
|
+
input_tokens: int = 0
|
|
109
|
+
output_tokens: int = 0
|
|
110
|
+
cache_read_tokens: int = 0
|
|
111
|
+
cache_write_tokens: int = 0
|
|
112
|
+
call_count: int = 0
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@dataclass(slots=True)
|
|
116
|
+
class TokenTracker:
|
|
117
|
+
"""Process-level accumulator for LLM token usage during a session.
|
|
118
|
+
|
|
119
|
+
Records ``input_tokens`` / ``output_tokens`` from each :class:`LLMResponse`
|
|
120
|
+
plus a per-model breakdown. Pure logic with no I/O so it is unit-testable in
|
|
121
|
+
isolation. Reset on session boundaries (``/new`` / ``/clear`` / ``/resume``)
|
|
122
|
+
but not on in-session compaction (``/compact``).
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
total_input: int = 0
|
|
126
|
+
total_output: int = 0
|
|
127
|
+
total_cache_read: int = 0
|
|
128
|
+
total_cache_write: int = 0
|
|
129
|
+
call_count: int = 0
|
|
130
|
+
per_model: dict[str, _ModelUsage] = field(default_factory=dict)
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def total_tokens(self) -> int:
|
|
134
|
+
return self.total_input + self.total_output + self.total_cache_read + self.total_cache_write
|
|
135
|
+
|
|
136
|
+
def record(self, response: Any, model: str) -> None:
|
|
137
|
+
"""Accumulate one LLM response's token usage under *model*.
|
|
138
|
+
|
|
139
|
+
Reads only the normalized usage fields off the response so it never
|
|
140
|
+
couples to a specific provider's wire shape. ``input_tokens`` is the
|
|
141
|
+
full-price remainder; cache read/write are additive and non-overlapping
|
|
142
|
+
(see ``LLMResponse``), so summing all four gives the true prompt size.
|
|
143
|
+
"""
|
|
144
|
+
input_tokens = int(getattr(response, "input_tokens", 0) or 0)
|
|
145
|
+
output_tokens = int(getattr(response, "output_tokens", 0) or 0)
|
|
146
|
+
cache_read = int(getattr(response, "cache_read_input_tokens", 0) or 0)
|
|
147
|
+
cache_write = int(getattr(response, "cache_creation_input_tokens", 0) or 0)
|
|
148
|
+
|
|
149
|
+
self.total_input += input_tokens
|
|
150
|
+
self.total_output += output_tokens
|
|
151
|
+
self.total_cache_read += cache_read
|
|
152
|
+
self.total_cache_write += cache_write
|
|
153
|
+
self.call_count += 1
|
|
154
|
+
|
|
155
|
+
usage = self.per_model.get(model)
|
|
156
|
+
if usage is None:
|
|
157
|
+
usage = _ModelUsage()
|
|
158
|
+
self.per_model[model] = usage
|
|
159
|
+
usage.input_tokens += input_tokens
|
|
160
|
+
usage.output_tokens += output_tokens
|
|
161
|
+
usage.cache_read_tokens += cache_read
|
|
162
|
+
usage.cache_write_tokens += cache_write
|
|
163
|
+
usage.call_count += 1
|
|
164
|
+
|
|
165
|
+
def reset(self) -> None:
|
|
166
|
+
"""Clear all accumulated usage (session boundary)."""
|
|
167
|
+
self.total_input = 0
|
|
168
|
+
self.total_output = 0
|
|
169
|
+
self.total_cache_read = 0
|
|
170
|
+
self.total_cache_write = 0
|
|
171
|
+
self.call_count = 0
|
|
172
|
+
self.per_model.clear()
|
|
173
|
+
|
|
174
|
+
def estimate_cost(
|
|
175
|
+
self,
|
|
176
|
+
prices: dict[str, dict[str, float]] | None,
|
|
177
|
+
) -> float | None:
|
|
178
|
+
"""Estimate total cost in USD across all priced models.
|
|
179
|
+
|
|
180
|
+
Models without a known price are skipped (their tokens still count, but
|
|
181
|
+
contribute no dollars). Returns ``None`` only when *no* recorded model
|
|
182
|
+
has a price, so the caller can suppress the ``$`` line entirely rather
|
|
183
|
+
than print ``$0.00``.
|
|
184
|
+
"""
|
|
185
|
+
total = 0.0
|
|
186
|
+
any_priced = False
|
|
187
|
+
for model, usage in self.per_model.items():
|
|
188
|
+
price = resolve_price(model, prices)
|
|
189
|
+
if price is None:
|
|
190
|
+
continue
|
|
191
|
+
any_priced = True
|
|
192
|
+
input_price, output_price = price
|
|
193
|
+
read_mult, write_mult = resolve_cache_multipliers(model)
|
|
194
|
+
total += usage.input_tokens / _PER_MILLION * input_price
|
|
195
|
+
total += usage.output_tokens / _PER_MILLION * output_price
|
|
196
|
+
total += usage.cache_read_tokens / _PER_MILLION * input_price * read_mult
|
|
197
|
+
total += usage.cache_write_tokens / _PER_MILLION * input_price * write_mult
|
|
198
|
+
return total if any_priced else None
|
|
199
|
+
|
|
200
|
+
def summary(self, prices: dict[str, dict[str, float]] | None) -> str:
|
|
201
|
+
"""Render a human-readable usage summary for the ``/cost`` command.
|
|
202
|
+
|
|
203
|
+
Always shows token counts (total input/output/total + call_count +
|
|
204
|
+
per-model breakdown). Priced models show their ``$`` estimate inline;
|
|
205
|
+
unpriced models are tagged ``(no price)``. A total cost line is added
|
|
206
|
+
only when at least one model is priced.
|
|
207
|
+
"""
|
|
208
|
+
lines = [
|
|
209
|
+
"Token usage (this session):",
|
|
210
|
+
f" Input: {self.total_input:,} tokens",
|
|
211
|
+
f" Output: {self.total_output:,} tokens",
|
|
212
|
+
]
|
|
213
|
+
# Only surface the cache line when caching actually happened, so
|
|
214
|
+
# non-cached sessions keep the original compact output.
|
|
215
|
+
if self.total_cache_read or self.total_cache_write:
|
|
216
|
+
lines.append(
|
|
217
|
+
f" Cache: {self.total_cache_read:,} read / "
|
|
218
|
+
f"{self.total_cache_write:,} write tokens"
|
|
219
|
+
)
|
|
220
|
+
lines.extend(
|
|
221
|
+
[
|
|
222
|
+
f" Total: {self.total_tokens:,} tokens",
|
|
223
|
+
f" Calls: {self.call_count}",
|
|
224
|
+
]
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
if self.per_model:
|
|
228
|
+
lines.append(" By model:")
|
|
229
|
+
for model in sorted(self.per_model):
|
|
230
|
+
usage = self.per_model[model]
|
|
231
|
+
price = resolve_price(model, prices)
|
|
232
|
+
if price is None:
|
|
233
|
+
cost_label = " (no price)"
|
|
234
|
+
else:
|
|
235
|
+
input_price, output_price = price
|
|
236
|
+
read_mult, write_mult = resolve_cache_multipliers(model)
|
|
237
|
+
model_cost = (
|
|
238
|
+
usage.input_tokens / _PER_MILLION * input_price
|
|
239
|
+
+ usage.output_tokens / _PER_MILLION * output_price
|
|
240
|
+
+ usage.cache_read_tokens / _PER_MILLION * input_price * read_mult
|
|
241
|
+
+ usage.cache_write_tokens / _PER_MILLION * input_price * write_mult
|
|
242
|
+
)
|
|
243
|
+
cost_label = f" — ${model_cost:.4f}"
|
|
244
|
+
cache_label = ""
|
|
245
|
+
if usage.cache_read_tokens or usage.cache_write_tokens:
|
|
246
|
+
cache_label = (
|
|
247
|
+
f", {usage.cache_read_tokens:,} cache-read / "
|
|
248
|
+
f"{usage.cache_write_tokens:,} cache-write"
|
|
249
|
+
)
|
|
250
|
+
lines.append(
|
|
251
|
+
f" {model}: "
|
|
252
|
+
f"{usage.input_tokens:,} in / {usage.output_tokens:,} out"
|
|
253
|
+
f"{cache_label} "
|
|
254
|
+
f"({usage.call_count} calls){cost_label}"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
total_cost = self.estimate_cost(prices)
|
|
258
|
+
if total_cost is not None:
|
|
259
|
+
lines.append(f" Estimated cost: ${total_cost:.4f}")
|
|
260
|
+
lines.append(" (prices are estimates; override via [cost.prices] in config)")
|
|
261
|
+
|
|
262
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
_SAVE_TIMESTAMP_FORMAT = "%Y-%m-%dT%H-%M-%S-%f"
|
|
10
|
+
_TIMESTAMP_FORMATS = (
|
|
11
|
+
_SAVE_TIMESTAMP_FORMAT,
|
|
12
|
+
"%Y-%m-%dT%H-%M-%S",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(slots=True)
|
|
17
|
+
class _TranscriptEntry:
|
|
18
|
+
session_id: str
|
|
19
|
+
timestamp: datetime
|
|
20
|
+
path: Path
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TranscriptManager:
|
|
24
|
+
def __init__(self, transcript_dir: str | Path = ".transcripts") -> None:
|
|
25
|
+
self.transcript_dir = Path(transcript_dir)
|
|
26
|
+
self.transcript_dir.mkdir(parents=True, exist_ok=True)
|
|
27
|
+
|
|
28
|
+
def save(self, messages: list[dict[str, Any]], session_id: str) -> Path:
|
|
29
|
+
timestamp = datetime.now().strftime(_SAVE_TIMESTAMP_FORMAT)
|
|
30
|
+
path = self.transcript_dir / f"{session_id}_{timestamp}.jsonl"
|
|
31
|
+
with path.open("w", encoding="utf-8") as file:
|
|
32
|
+
for message in messages:
|
|
33
|
+
file.write(json.dumps(message, ensure_ascii=False))
|
|
34
|
+
file.write("\n")
|
|
35
|
+
return path
|
|
36
|
+
|
|
37
|
+
def load(self, session_id: str) -> list[dict[str, Any]]:
|
|
38
|
+
entry = self._get_session_entry(session_id)
|
|
39
|
+
with entry.path.open("r", encoding="utf-8") as file:
|
|
40
|
+
return [json.loads(line) for line in file if line.strip()]
|
|
41
|
+
|
|
42
|
+
def list_sessions(self) -> list[str]:
|
|
43
|
+
latest_by_session: dict[str, datetime] = {}
|
|
44
|
+
for entry in self._iter_entries():
|
|
45
|
+
latest_by_session[entry.session_id] = max(
|
|
46
|
+
entry.timestamp,
|
|
47
|
+
latest_by_session.get(entry.session_id, datetime.min),
|
|
48
|
+
)
|
|
49
|
+
return [
|
|
50
|
+
session_id
|
|
51
|
+
for session_id, _ in sorted(
|
|
52
|
+
latest_by_session.items(),
|
|
53
|
+
key=lambda item: item[1],
|
|
54
|
+
reverse=True,
|
|
55
|
+
)
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
def get_latest_session(self) -> str | None:
|
|
59
|
+
entries = self._iter_entries()
|
|
60
|
+
if not entries:
|
|
61
|
+
return None
|
|
62
|
+
return max(entries, key=lambda entry: entry.timestamp).session_id
|
|
63
|
+
|
|
64
|
+
def resume(self, session_id: str | None = None) -> list[dict[str, Any]]:
|
|
65
|
+
target_session = session_id or self.get_latest_session()
|
|
66
|
+
if target_session is None:
|
|
67
|
+
raise FileNotFoundError("No saved transcripts found.")
|
|
68
|
+
return self.load(target_session)
|
|
69
|
+
|
|
70
|
+
def _get_session_entry(self, session_id: str) -> _TranscriptEntry:
|
|
71
|
+
entries = [
|
|
72
|
+
entry for entry in self._iter_entries() if entry.session_id == session_id
|
|
73
|
+
]
|
|
74
|
+
if not entries:
|
|
75
|
+
raise FileNotFoundError(f"No transcript found for session: {session_id}")
|
|
76
|
+
return max(entries, key=lambda entry: entry.timestamp)
|
|
77
|
+
|
|
78
|
+
def _iter_entries(self) -> list[_TranscriptEntry]:
|
|
79
|
+
entries: list[_TranscriptEntry] = []
|
|
80
|
+
for path in self.transcript_dir.glob("*.jsonl"):
|
|
81
|
+
entry = self._parse_entry(path)
|
|
82
|
+
if entry is not None:
|
|
83
|
+
entries.append(entry)
|
|
84
|
+
return entries
|
|
85
|
+
|
|
86
|
+
def _parse_entry(self, path: Path) -> _TranscriptEntry | None:
|
|
87
|
+
stem = path.stem
|
|
88
|
+
if "_" not in stem:
|
|
89
|
+
return None
|
|
90
|
+
session_id, raw_timestamp = stem.rsplit("_", 1)
|
|
91
|
+
timestamp: datetime | None = None
|
|
92
|
+
for fmt in _TIMESTAMP_FORMATS:
|
|
93
|
+
try:
|
|
94
|
+
timestamp = datetime.strptime(raw_timestamp, fmt)
|
|
95
|
+
break
|
|
96
|
+
except ValueError:
|
|
97
|
+
continue
|
|
98
|
+
if timestamp is None:
|
|
99
|
+
return None
|
|
100
|
+
return _TranscriptEntry(session_id=session_id, timestamp=timestamp, path=path)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Permission control modules for BareAgent."""
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
import sys
|
|
6
|
+
from collections.abc import Callable
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from bareagent.planning.agent_types import AgentType
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PermissionMode(Enum):
|
|
15
|
+
DEFAULT = "default"
|
|
16
|
+
AUTO = "auto"
|
|
17
|
+
PLAN = "plan"
|
|
18
|
+
BYPASS = "bypass"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_SHELLS = "bash|sh|zsh|dash|ksh|fish"
|
|
22
|
+
|
|
23
|
+
_MCP_TOOL_PREFIX = "mcp__"
|
|
24
|
+
# Preview limits for MCP ask prompts. MCP args are JSON, not shell text, and
|
|
25
|
+
# servers can produce arbitrarily large strings (file blobs, long URLs). Cap
|
|
26
|
+
# top-level string values so a single field can't flood the terminal.
|
|
27
|
+
_MCP_PREVIEW_FIELD_LIMIT = 256
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _is_mcp_tool(tool_name: str) -> bool:
|
|
31
|
+
"""Return True if ``tool_name`` follows the ``mcp__<server>__<tool>`` namespace."""
|
|
32
|
+
return tool_name.startswith(_MCP_TOOL_PREFIX)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PermissionGuard:
|
|
36
|
+
SAFE_TOOLS = {
|
|
37
|
+
"read_file",
|
|
38
|
+
"glob",
|
|
39
|
+
"grep",
|
|
40
|
+
"todo_read",
|
|
41
|
+
"todo_write",
|
|
42
|
+
"load_skill",
|
|
43
|
+
"task_list",
|
|
44
|
+
"task_get",
|
|
45
|
+
"team_list",
|
|
46
|
+
"web_fetch",
|
|
47
|
+
"web_search",
|
|
48
|
+
# Memory is sandboxed to its own directory (never user code) and is
|
|
49
|
+
# agent bookkeeping; prompting on every recall/save would be noise.
|
|
50
|
+
# Read-only isolation for sub-agents is handled at the AgentType layer
|
|
51
|
+
# (memory_writable), not here.
|
|
52
|
+
"memory",
|
|
53
|
+
# skill_create writes only to the generated-skills pending sandbox and
|
|
54
|
+
# is exposed only inside the isolated reflection call (never the main
|
|
55
|
+
# tool set / sub-agents), so prompting would be noise.
|
|
56
|
+
"skill_create",
|
|
57
|
+
# goal_verdict only records the evaluator's judgement into an in-memory
|
|
58
|
+
# sink (no workspace side effects) and is exposed only inside the
|
|
59
|
+
# isolated goal-evaluator call (never the main tool set / sub-agents),
|
|
60
|
+
# so prompting would be noise.
|
|
61
|
+
"goal_verdict",
|
|
62
|
+
# exit_plan_mode is the *only* way out of PLAN mode; it MUST be allowed
|
|
63
|
+
# while in PLAN (a non-SAFE tool is blocked there). Its own action is the
|
|
64
|
+
# approval prompt, so a separate permission confirm would be redundant.
|
|
65
|
+
# It is a main-loop-only tool (never in the global set / sub-agents).
|
|
66
|
+
"exit_plan_mode",
|
|
67
|
+
}
|
|
68
|
+
AUTO_SAFE_PATTERNS = [
|
|
69
|
+
re.compile(r"^(ls|cat|head|tail|wc|echo|pwd|date|which|type)\b"),
|
|
70
|
+
re.compile(r"^git\s+(status|log|diff|branch|show)\b"),
|
|
71
|
+
re.compile(r"^(pytest|python\s+-m\s+pytest|ruff|mypy)\b"),
|
|
72
|
+
re.compile(r"^npm\s+(test|run\s+lint|run\s+test)\b"),
|
|
73
|
+
]
|
|
74
|
+
DANGEROUS_PATTERNS = [
|
|
75
|
+
re.compile(r"(^|\s)rm\s+-[rR]f?\b"),
|
|
76
|
+
re.compile(r"git\s+push\s+--force\b"),
|
|
77
|
+
re.compile(r"git\s+reset\s+--hard\b"),
|
|
78
|
+
re.compile(r"DROP\s+TABLE\b", re.IGNORECASE),
|
|
79
|
+
re.compile(r"DELETE\s+FROM\b", re.IGNORECASE),
|
|
80
|
+
# shell wrapper bypass
|
|
81
|
+
re.compile(rf"(^|\s)({_SHELLS})\s+-c\b"),
|
|
82
|
+
# absolute-path rm bypass
|
|
83
|
+
re.compile(r"(^|\s)/(?:usr/)?bin/rm\b"),
|
|
84
|
+
# env prefix bypass
|
|
85
|
+
re.compile(r"(^|\s)env\s+"),
|
|
86
|
+
# pipe-to-shell execution
|
|
87
|
+
re.compile(rf"curl\b.*\|\s*({_SHELLS})\b"),
|
|
88
|
+
re.compile(rf"wget\b.*\|\s*({_SHELLS})\b"),
|
|
89
|
+
# destructive system commands
|
|
90
|
+
re.compile(r"(^|\s)chmod\s+777\b"),
|
|
91
|
+
re.compile(r"(^|\s)mkfs\b"),
|
|
92
|
+
re.compile(r"(^|\s)dd\s+if="),
|
|
93
|
+
re.compile(r"find\b.*-delete\b"),
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
def __init__(
|
|
97
|
+
self,
|
|
98
|
+
mode: PermissionMode = PermissionMode.DEFAULT,
|
|
99
|
+
*,
|
|
100
|
+
fail_closed: bool = False,
|
|
101
|
+
ask_user_fn: Callable[[Any], bool] | None = None,
|
|
102
|
+
) -> None:
|
|
103
|
+
self.mode = mode
|
|
104
|
+
self.allow_rules: list[str] = []
|
|
105
|
+
self.deny_rules: list[str] = []
|
|
106
|
+
self.fail_closed = fail_closed
|
|
107
|
+
self._ask_user_fn = ask_user_fn
|
|
108
|
+
|
|
109
|
+
def requires_confirm(self, tool_name: str, tool_input: dict[str, Any]) -> bool:
|
|
110
|
+
if self.mode == PermissionMode.BYPASS:
|
|
111
|
+
return False
|
|
112
|
+
normalized_tool = tool_name.strip().lower()
|
|
113
|
+
rule_subject = permission_rule_subject(normalized_tool, tool_input)
|
|
114
|
+
# MCP tools carry JSON args (not shell text), so DANGEROUS_PATTERNS
|
|
115
|
+
# are not applicable. Branch early on mode but still honour the
|
|
116
|
+
# generic allow/deny prefix rules (handled below via rule_subject).
|
|
117
|
+
if _is_mcp_tool(normalized_tool):
|
|
118
|
+
# PLAN mode rejects every MCP tool by policy — MCP servers have
|
|
119
|
+
# unknown side effects and are not in SAFE_TOOLS. This check runs
|
|
120
|
+
# before allow_rules so an allowlist in config.toml cannot punch
|
|
121
|
+
# holes through PLAN.
|
|
122
|
+
if self.mode == PermissionMode.PLAN:
|
|
123
|
+
return True
|
|
124
|
+
if rule_subject and self._match_rules(
|
|
125
|
+
self.deny_rules,
|
|
126
|
+
normalized_tool,
|
|
127
|
+
rule_subject,
|
|
128
|
+
):
|
|
129
|
+
return True
|
|
130
|
+
if rule_subject and self._match_rules(
|
|
131
|
+
self.allow_rules,
|
|
132
|
+
normalized_tool,
|
|
133
|
+
rule_subject,
|
|
134
|
+
):
|
|
135
|
+
return False
|
|
136
|
+
if self.mode == PermissionMode.AUTO:
|
|
137
|
+
return False
|
|
138
|
+
# DEFAULT: always ask for MCP tools.
|
|
139
|
+
return True
|
|
140
|
+
if self.mode == PermissionMode.PLAN:
|
|
141
|
+
return normalized_tool not in self.SAFE_TOOLS
|
|
142
|
+
if normalized_tool == "bash":
|
|
143
|
+
cmd = rule_subject or ""
|
|
144
|
+
if self._match_rules(self.deny_rules, normalized_tool, cmd):
|
|
145
|
+
return True
|
|
146
|
+
if any(pattern.search(cmd) for pattern in self.DANGEROUS_PATTERNS):
|
|
147
|
+
return True
|
|
148
|
+
if self._match_rules(self.allow_rules, normalized_tool, cmd):
|
|
149
|
+
return False
|
|
150
|
+
if any(pattern.search(cmd) for pattern in self.AUTO_SAFE_PATTERNS):
|
|
151
|
+
return False
|
|
152
|
+
if self.mode == PermissionMode.DEFAULT:
|
|
153
|
+
return True
|
|
154
|
+
# AUTO mode: not matching any dangerous pattern, allow
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
if rule_subject and self._match_rules(
|
|
158
|
+
self.deny_rules,
|
|
159
|
+
normalized_tool,
|
|
160
|
+
rule_subject,
|
|
161
|
+
):
|
|
162
|
+
return True
|
|
163
|
+
if normalized_tool in self.SAFE_TOOLS:
|
|
164
|
+
return False
|
|
165
|
+
if normalized_tool in {"edit_file", "task_create", "task_update"}:
|
|
166
|
+
return False
|
|
167
|
+
if rule_subject and self._match_rules(
|
|
168
|
+
self.allow_rules,
|
|
169
|
+
normalized_tool,
|
|
170
|
+
rule_subject,
|
|
171
|
+
):
|
|
172
|
+
return False
|
|
173
|
+
if normalized_tool in {"write_file", "semantic_rename"}:
|
|
174
|
+
# Write tools: confirm in DEFAULT, auto-approve in AUTO. PLAN was
|
|
175
|
+
# already rejected above (not in SAFE_TOOLS), BYPASS short-circuited
|
|
176
|
+
# at the top.
|
|
177
|
+
return self.mode == PermissionMode.DEFAULT
|
|
178
|
+
return True
|
|
179
|
+
|
|
180
|
+
def is_dangerous(self, tool_name: str, tool_input: dict[str, Any]) -> bool:
|
|
181
|
+
"""Return True if ``tool_name`` + ``tool_input`` match a known dangerous shell pattern.
|
|
182
|
+
|
|
183
|
+
DANGEROUS_PATTERNS encode shell-text heuristics (``rm -rf``,
|
|
184
|
+
``git push --force``, ``DROP TABLE``...). They are intentionally
|
|
185
|
+
skipped for MCP tools, whose ``tool_input`` is JSON rather than a
|
|
186
|
+
shell command — applying shell regexes against JSON would produce
|
|
187
|
+
false positives without catching anything real.
|
|
188
|
+
"""
|
|
189
|
+
normalized_tool = tool_name.strip().lower()
|
|
190
|
+
if _is_mcp_tool(normalized_tool):
|
|
191
|
+
return False
|
|
192
|
+
if normalized_tool == "bash":
|
|
193
|
+
cmd = str(tool_input.get("command", ""))
|
|
194
|
+
return any(pattern.search(cmd) for pattern in self.DANGEROUS_PATTERNS)
|
|
195
|
+
return False
|
|
196
|
+
|
|
197
|
+
def format_preview(self, tool_name: str, tool_input: dict[str, Any]) -> str:
|
|
198
|
+
"""Return a human-readable JSON preview of ``tool_input`` for ask prompts.
|
|
199
|
+
|
|
200
|
+
Top-level string values longer than ``_MCP_PREVIEW_FIELD_LIMIT`` are
|
|
201
|
+
truncated with a ``... [truncated, N chars]`` suffix so a single huge
|
|
202
|
+
argument (file blob, long URL) cannot drown the terminal. Nested
|
|
203
|
+
structures are not recursively truncated — v1 keeps the rule simple.
|
|
204
|
+
"""
|
|
205
|
+
if not isinstance(tool_input, dict) or not tool_input:
|
|
206
|
+
return json.dumps(tool_input, ensure_ascii=False, indent=2)
|
|
207
|
+
prepared: dict[str, Any] = {}
|
|
208
|
+
for key, value in tool_input.items():
|
|
209
|
+
if isinstance(value, str) and len(value) > _MCP_PREVIEW_FIELD_LIMIT:
|
|
210
|
+
prepared[key] = (
|
|
211
|
+
value[:_MCP_PREVIEW_FIELD_LIMIT] + f"... [truncated, {len(value)} chars]"
|
|
212
|
+
)
|
|
213
|
+
else:
|
|
214
|
+
prepared[key] = value
|
|
215
|
+
return json.dumps(prepared, ensure_ascii=False, indent=2, default=str)
|
|
216
|
+
|
|
217
|
+
def ask_user(self, call: Any) -> bool:
|
|
218
|
+
if self.fail_closed:
|
|
219
|
+
return False
|
|
220
|
+
if self.mode == PermissionMode.PLAN:
|
|
221
|
+
print(f"Plan mode: {call.name} blocked (read-only)")
|
|
222
|
+
return False
|
|
223
|
+
if self._ask_user_fn is not None:
|
|
224
|
+
return self._ask_user_fn(call)
|
|
225
|
+
if not sys.stdin.isatty():
|
|
226
|
+
print(f"Non-interactive environment: {call.name} denied")
|
|
227
|
+
return False
|
|
228
|
+
print(f"{call.name}: {json.dumps(call.input, ensure_ascii=False)[:200]}")
|
|
229
|
+
try:
|
|
230
|
+
return input("Allow? [y/N] ").strip().lower() == "y"
|
|
231
|
+
except EOFError:
|
|
232
|
+
return False
|
|
233
|
+
|
|
234
|
+
def _match_rules(self, rules: list[str], tool_name: str, cmd: str) -> bool:
|
|
235
|
+
normalized_tool = tool_name.strip().lower()
|
|
236
|
+
for rule in rules:
|
|
237
|
+
parsed = _parse_prefix_rule(rule)
|
|
238
|
+
if parsed is None:
|
|
239
|
+
continue
|
|
240
|
+
rule_tool, prefix = parsed
|
|
241
|
+
if rule_tool != normalized_tool:
|
|
242
|
+
continue
|
|
243
|
+
if cmd.strip().startswith(prefix):
|
|
244
|
+
return True
|
|
245
|
+
return False
|
|
246
|
+
|
|
247
|
+
def clone(
|
|
248
|
+
self, *, mode: PermissionMode | None = None, fail_closed: bool | None = None
|
|
249
|
+
) -> PermissionGuard:
|
|
250
|
+
"""Create a copy of this guard with optional overrides."""
|
|
251
|
+
child = PermissionGuard(
|
|
252
|
+
mode=mode if mode is not None else self.mode,
|
|
253
|
+
fail_closed=fail_closed if fail_closed is not None else self.fail_closed,
|
|
254
|
+
ask_user_fn=self._ask_user_fn,
|
|
255
|
+
)
|
|
256
|
+
child.allow_rules = list(self.allow_rules)
|
|
257
|
+
child.deny_rules = list(self.deny_rules)
|
|
258
|
+
return child
|
|
259
|
+
|
|
260
|
+
def for_subagent(
|
|
261
|
+
self,
|
|
262
|
+
agent_type: AgentType,
|
|
263
|
+
*,
|
|
264
|
+
background: bool = False,
|
|
265
|
+
) -> PermissionGuard:
|
|
266
|
+
"""Clone the guard for child-agent execution."""
|
|
267
|
+
resolved_mode = (
|
|
268
|
+
agent_type.permission_mode if agent_type.permission_mode is not None else self.mode
|
|
269
|
+
)
|
|
270
|
+
return self.clone(
|
|
271
|
+
mode=resolved_mode,
|
|
272
|
+
fail_closed=self.fail_closed or background or resolved_mode == PermissionMode.PLAN,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _parse_prefix_rule(rule: str) -> tuple[str, str] | None:
|
|
277
|
+
match = re.fullmatch(
|
|
278
|
+
r"\s*([A-Za-z_][A-Za-z0-9_]*)\((prefix|prefix_json):([\s\S]+)\)\s*",
|
|
279
|
+
rule,
|
|
280
|
+
)
|
|
281
|
+
if match is None:
|
|
282
|
+
return None
|
|
283
|
+
tool_name = match.group(1).strip().lower()
|
|
284
|
+
rule_kind = match.group(2)
|
|
285
|
+
raw_prefix = match.group(3)
|
|
286
|
+
if rule_kind == "prefix_json":
|
|
287
|
+
try:
|
|
288
|
+
parsed_prefix = json.loads(raw_prefix)
|
|
289
|
+
except json.JSONDecodeError:
|
|
290
|
+
return None
|
|
291
|
+
if not isinstance(parsed_prefix, str):
|
|
292
|
+
return None
|
|
293
|
+
return tool_name, parsed_prefix
|
|
294
|
+
prefix = raw_prefix.rstrip("*").strip()
|
|
295
|
+
return tool_name, prefix
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def permission_rule_subject(tool_name: str, tool_input: dict[str, Any]) -> str | None:
|
|
299
|
+
normalized_tool = tool_name.strip().lower()
|
|
300
|
+
if normalized_tool == "bash":
|
|
301
|
+
command = str(tool_input.get("command", "")).strip()
|
|
302
|
+
return command or None
|
|
303
|
+
|
|
304
|
+
for key in ("file_path", "path", "name", "to_agent", "task_id", "skill_name"):
|
|
305
|
+
value = tool_input.get(key)
|
|
306
|
+
if not isinstance(value, str):
|
|
307
|
+
continue
|
|
308
|
+
subject = value.strip()
|
|
309
|
+
if subject:
|
|
310
|
+
return subject
|
|
311
|
+
|
|
312
|
+
if "task" in tool_input:
|
|
313
|
+
task = str(tool_input.get("task", "")).strip()
|
|
314
|
+
if task:
|
|
315
|
+
return task
|
|
316
|
+
|
|
317
|
+
if not tool_input:
|
|
318
|
+
return None
|
|
319
|
+
|
|
320
|
+
try:
|
|
321
|
+
serialized = json.dumps(
|
|
322
|
+
tool_input,
|
|
323
|
+
ensure_ascii=False,
|
|
324
|
+
sort_keys=True,
|
|
325
|
+
default=str,
|
|
326
|
+
)
|
|
327
|
+
except (TypeError, ValueError):
|
|
328
|
+
serialized = str(tool_input).strip()
|
|
329
|
+
return serialized or None
|