editbuffer 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {editbuffer-0.2.2/src/editbuffer.egg-info → editbuffer-0.2.3}/PKG-INFO +7 -8
  2. {editbuffer-0.2.2 → editbuffer-0.2.3}/README.md +6 -7
  3. {editbuffer-0.2.2 → editbuffer-0.2.3}/pyproject.toml +1 -1
  4. editbuffer-0.2.3/src/editbuffer/history.py +258 -0
  5. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer/mcp_server.py +127 -40
  6. {editbuffer-0.2.2 → editbuffer-0.2.3/src/editbuffer.egg-info}/PKG-INFO +7 -8
  7. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer.egg-info/SOURCES.txt +5 -1
  8. editbuffer-0.2.3/tests/test_collect_harbor_results.py +40 -0
  9. editbuffer-0.2.3/tests/test_mcp_server.py +154 -0
  10. editbuffer-0.2.3/tests/test_mcp_stdio_eval.py +384 -0
  11. editbuffer-0.2.3/tests/test_metrics.py +34 -0
  12. editbuffer-0.2.3/tests/test_parse_agent_trajectories.py +123 -0
  13. editbuffer-0.2.3/tests/test_run_terminal_bench_ab.py +124 -0
  14. editbuffer-0.2.2/src/editbuffer/history.py +0 -33
  15. editbuffer-0.2.2/tests/test_mcp_server.py +0 -81
  16. editbuffer-0.2.2/tests/test_mcp_stdio_eval.py +0 -370
  17. {editbuffer-0.2.2 → editbuffer-0.2.3}/LICENSE +0 -0
  18. {editbuffer-0.2.2 → editbuffer-0.2.3}/setup.cfg +0 -0
  19. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer/__init__.py +0 -0
  20. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer/blocks.py +0 -0
  21. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer/buffer.py +0 -0
  22. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer/cli.py +0 -0
  23. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer/errors.py +0 -0
  24. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer/operations.py +0 -0
  25. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer/py.typed +0 -0
  26. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer/resolver.py +0 -0
  27. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer/selection.py +0 -0
  28. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer/validators.py +0 -0
  29. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer.egg-info/dependency_links.txt +0 -0
  30. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer.egg-info/entry_points.txt +0 -0
  31. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer.egg-info/requires.txt +0 -0
  32. {editbuffer-0.2.2 → editbuffer-0.2.3}/src/editbuffer.egg-info/top_level.txt +0 -0
  33. {editbuffer-0.2.2 → editbuffer-0.2.3}/tests/test_cli.py +0 -0
  34. {editbuffer-0.2.2 → editbuffer-0.2.3}/tests/test_editbuffer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: editbuffer
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Selection-based mutable output buffer for LLM tools
5
5
  Author: averagedigital
6
6
  License-Expression: MIT
@@ -219,7 +219,6 @@ Claude Desktop and generic MCP client examples are in
219
219
 
220
220
  The server exposes:
221
221
 
222
- - `buffer_create`
223
222
  - `buffer_append`
224
223
  - `buffer_list`
225
224
  - `buffer_view`
@@ -231,8 +230,8 @@ The server exposes:
231
230
  - `buffer_history`
232
231
  - `buffer_rollback`
233
232
  - `buffer_commit`
234
- - `command_history`
235
- - `command_select`
233
+ - `tool_history`
234
+ - `tool_select`
236
235
 
237
236
  Buffers are in-memory and live for the MCP server process. The MCP layer calls
238
237
  the same core API and does not implement separate edit semantics.
@@ -249,10 +248,10 @@ Use the first-class selection tools for normal agent use:
249
248
 
250
249
  `buffer_edit` remains available for raw JSON operations.
251
250
 
252
- `buffer_commit` remembers non-empty committed output as a reusable command.
253
- `command_history` returns the last 10 commands, newest first. `command_select`
254
- creates a new pending buffer from a previous command so the model can reuse it
255
- instead of regenerating it.
251
+ MCP calls are recorded in SQLite-backed history. `tool_history` returns recent
252
+ calls, newest first. `tool_select` creates a pending buffer from selectable
253
+ content in a previous call so the model can repair it instead of regenerating
254
+ it.
256
255
 
257
256
  ## Examples
258
257
 
@@ -201,7 +201,6 @@ Claude Desktop and generic MCP client examples are in
201
201
 
202
202
  The server exposes:
203
203
 
204
- - `buffer_create`
205
204
  - `buffer_append`
206
205
  - `buffer_list`
207
206
  - `buffer_view`
@@ -213,8 +212,8 @@ The server exposes:
213
212
  - `buffer_history`
214
213
  - `buffer_rollback`
215
214
  - `buffer_commit`
216
- - `command_history`
217
- - `command_select`
215
+ - `tool_history`
216
+ - `tool_select`
218
217
 
219
218
  Buffers are in-memory and live for the MCP server process. The MCP layer calls
220
219
  the same core API and does not implement separate edit semantics.
@@ -231,10 +230,10 @@ Use the first-class selection tools for normal agent use:
231
230
 
232
231
  `buffer_edit` remains available for raw JSON operations.
233
232
 
234
- `buffer_commit` remembers non-empty committed output as a reusable command.
235
- `command_history` returns the last 10 commands, newest first. `command_select`
236
- creates a new pending buffer from a previous command so the model can reuse it
237
- instead of regenerating it.
233
+ MCP calls are recorded in SQLite-backed history. `tool_history` returns recent
234
+ calls, newest first. `tool_select` creates a pending buffer from selectable
235
+ content in a previous call so the model can repair it instead of regenerating
236
+ it.
238
237
 
239
238
  ## Examples
240
239
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "editbuffer"
7
- version = "0.2.2"
7
+ version = "0.2.3"
8
8
  description = "Selection-based mutable output buffer for LLM tools"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -0,0 +1,258 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import sqlite3
6
+ from datetime import UTC, datetime, timedelta
7
+ from dataclasses import dataclass
8
+ from collections.abc import Iterator
9
+ from pathlib import Path
10
+ from typing import Any
11
+ from uuid import uuid4
12
+
13
+ from .operations import EditOperation
14
+
15
+
16
+ @dataclass(frozen=True, slots=True)
17
+ class EditRecord:
18
+ operation: EditOperation
19
+ start: int
20
+ end: int
21
+ before: str
22
+ after: str
23
+ version_before: int
24
+ version_after: int
25
+ confidence: float = 1.0
26
+
27
+
28
+ class EditHistory:
29
+ def __init__(self) -> None:
30
+ self._records: list[EditRecord] = []
31
+
32
+ def append(self, record: EditRecord) -> None:
33
+ self._records.append(record)
34
+
35
+ def __len__(self) -> int:
36
+ return len(self._records)
37
+
38
+ def __getitem__(self, index: int) -> EditRecord:
39
+ return self._records[index]
40
+
41
+ def __iter__(self) -> Iterator[EditRecord]:
42
+ return iter(self._records)
43
+
44
+
45
+ SECRET_KEYS = ("api_key", "token", "secret", "password", "authorization")
46
+
47
+
48
+ class ToolHistoryStore:
49
+ def __init__(
50
+ self,
51
+ path: str | Path | None = None,
52
+ *,
53
+ retention_days: int | None = None,
54
+ default_limit: int | None = None,
55
+ ) -> None:
56
+ self.path = Path(path) if path is not None else _default_history_path()
57
+ self.retention_days = (
58
+ retention_days
59
+ if retention_days is not None
60
+ else int(os.environ.get("EDITBUFFER_HISTORY_RETENTION_DAYS", "7"))
61
+ )
62
+ self.default_limit = (
63
+ default_limit
64
+ if default_limit is not None
65
+ else int(os.environ.get("EDITBUFFER_HISTORY_LIMIT", "10"))
66
+ )
67
+ self.path.parent.mkdir(parents=True, exist_ok=True)
68
+ self._init_db()
69
+ self.cleanup()
70
+
71
+ def record_tool_call(
72
+ self,
73
+ tool_name: str,
74
+ arguments: dict[str, Any] | None = None,
75
+ *,
76
+ call_id: str | None = None,
77
+ result: Any = None,
78
+ status: str = "success",
79
+ error: str | None = None,
80
+ content: str | None = None,
81
+ command: str | None = None,
82
+ timestamp: datetime | None = None,
83
+ ) -> str:
84
+ self.cleanup()
85
+ identifier = call_id or f"call-{uuid4().hex}"
86
+ when = timestamp or datetime.now(UTC)
87
+ redacted_arguments = _redact(arguments or {})
88
+ redacted_result = _redact(result)
89
+ with self._connect() as db:
90
+ db.execute(
91
+ """
92
+ INSERT OR REPLACE INTO tool_calls (
93
+ call_id, timestamp, tool_name, arguments_json, result_json,
94
+ result_summary, status, error, content, command
95
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
96
+ """,
97
+ (
98
+ identifier,
99
+ when.isoformat(),
100
+ tool_name,
101
+ _json_dump(redacted_arguments),
102
+ _json_dump(redacted_result),
103
+ _summary(redacted_result),
104
+ status,
105
+ error,
106
+ content,
107
+ command,
108
+ ),
109
+ )
110
+ return identifier
111
+
112
+ def list_tool_calls(self, limit: int | None = None) -> list[dict[str, Any]]:
113
+ self.cleanup()
114
+ row_limit = self.default_limit if limit is None else limit
115
+ with self._connect() as db:
116
+ rows = db.execute(
117
+ """
118
+ SELECT call_id, timestamp, tool_name, arguments_json, result_json,
119
+ result_summary, status, error, content, command
120
+ FROM tool_calls
121
+ ORDER BY timestamp DESC, rowid DESC
122
+ LIMIT ?
123
+ """,
124
+ (row_limit,),
125
+ ).fetchall()
126
+ return [_row(row) for row in rows]
127
+
128
+ def get_tool_call(self, call_id: str) -> dict[str, Any]:
129
+ self.cleanup()
130
+ with self._connect() as db:
131
+ row = db.execute(
132
+ """
133
+ SELECT call_id, timestamp, tool_name, arguments_json, result_json,
134
+ result_summary, status, error, content, command
135
+ FROM tool_calls
136
+ WHERE call_id = ?
137
+ """,
138
+ (call_id,),
139
+ ).fetchone()
140
+ if row is None:
141
+ raise KeyError(f"unknown tool call: {call_id}")
142
+ return _row(row)
143
+
144
+ def command_history(self, limit: int | None = None) -> list[dict[str, str]]:
145
+ row_limit = self.default_limit if limit is None else limit
146
+ with self._connect() as db:
147
+ rows = db.execute(
148
+ """
149
+ SELECT call_id, command
150
+ FROM tool_calls
151
+ WHERE command IS NOT NULL AND command != ''
152
+ ORDER BY timestamp DESC, rowid DESC
153
+ LIMIT ?
154
+ """,
155
+ (row_limit,),
156
+ ).fetchall()
157
+ return [{"command_id": row[0], "command": row[1]} for row in rows]
158
+
159
+ def get_command(self, command_id: str) -> str:
160
+ with self._connect() as db:
161
+ row = db.execute(
162
+ """
163
+ SELECT command
164
+ FROM tool_calls
165
+ WHERE call_id = ? AND command IS NOT NULL AND command != ''
166
+ """,
167
+ (command_id,),
168
+ ).fetchone()
169
+ if row is None:
170
+ raise KeyError(f"unknown command: {command_id}")
171
+ return str(row[0])
172
+
173
+ def cleanup(self) -> None:
174
+ cutoff = datetime.now(UTC) - timedelta(days=self.retention_days)
175
+ with self._connect() as db:
176
+ db.execute("DELETE FROM tool_calls WHERE timestamp < ?", (cutoff.isoformat(),))
177
+
178
+ def _init_db(self) -> None:
179
+ with self._connect() as db:
180
+ db.execute(
181
+ """
182
+ CREATE TABLE IF NOT EXISTS tool_calls (
183
+ call_id TEXT PRIMARY KEY,
184
+ timestamp TEXT NOT NULL,
185
+ tool_name TEXT NOT NULL,
186
+ arguments_json TEXT NOT NULL,
187
+ result_json TEXT,
188
+ result_summary TEXT,
189
+ status TEXT NOT NULL,
190
+ error TEXT,
191
+ content TEXT,
192
+ command TEXT
193
+ )
194
+ """
195
+ )
196
+ db.execute(
197
+ "CREATE INDEX IF NOT EXISTS idx_tool_calls_timestamp ON tool_calls(timestamp)"
198
+ )
199
+
200
+ def _connect(self) -> sqlite3.Connection:
201
+ return sqlite3.connect(self.path)
202
+
203
+
204
+ def _default_history_path() -> Path:
205
+ configured = os.environ.get("EDITBUFFER_HISTORY_DB")
206
+ if configured:
207
+ return Path(configured)
208
+ return Path.home() / ".editbuffer" / "history.sqlite3"
209
+
210
+
211
+ def _redact(value: Any) -> Any:
212
+ if isinstance(value, dict):
213
+ redacted: dict[str, Any] = {}
214
+ for key, item in value.items():
215
+ if any(secret in str(key).lower() for secret in SECRET_KEYS):
216
+ redacted[key] = "[REDACTED]"
217
+ else:
218
+ redacted[key] = _redact(item)
219
+ return redacted
220
+ if isinstance(value, list):
221
+ return [_redact(item) for item in value]
222
+ return value
223
+
224
+
225
+ def _json_dump(value: Any) -> str | None:
226
+ if value is None:
227
+ return None
228
+ return json.dumps(value, ensure_ascii=False, sort_keys=True, default=str)
229
+
230
+
231
+ def _json_load(value: str | None) -> Any:
232
+ if value is None:
233
+ return None
234
+ return json.loads(value)
235
+
236
+
237
+ def _summary(value: Any) -> str | None:
238
+ if value is None:
239
+ return None
240
+ text = _json_dump(value)
241
+ if text is None:
242
+ return None
243
+ return text[:500]
244
+
245
+
246
+ def _row(row: tuple[Any, ...]) -> dict[str, Any]:
247
+ return {
248
+ "call_id": row[0],
249
+ "timestamp": row[1],
250
+ "tool_name": row[2],
251
+ "arguments": _json_load(row[3]),
252
+ "result": _json_load(row[4]),
253
+ "result_summary": row[5],
254
+ "status": row[6],
255
+ "error": row[7],
256
+ "content": row[8],
257
+ "command": row[9],
258
+ }
@@ -13,14 +13,14 @@ from .errors import (
13
13
  StaleVersionError,
14
14
  TargetNotFoundError,
15
15
  )
16
- from .history import EditRecord
16
+ from .history import EditRecord, ToolHistoryStore
17
17
 
18
18
 
19
19
  class BufferRegistry:
20
- def __init__(self) -> None:
20
+ def __init__(self, history_store: ToolHistoryStore | None = None) -> None:
21
21
  self._buffers: dict[str, EditBuffer] = {}
22
- self._commands: list[dict[str, Any]] = []
23
- self._next_command_number = 1
22
+ self._history_store = history_store or ToolHistoryStore()
23
+ self._next_command_number = self._initial_command_number()
24
24
 
25
25
  def create(
26
26
  self,
@@ -67,7 +67,7 @@ class BufferRegistry:
67
67
  return self._state(buffer_id, buffer)
68
68
 
69
69
  def command_history(self) -> list[dict[str, Any]]:
70
- return list(self._commands)
70
+ return self._history_store.command_history()
71
71
 
72
72
  def current_version(self, buffer_id: str | None) -> int | None:
73
73
  if buffer_id is None:
@@ -81,10 +81,41 @@ class BufferRegistry:
81
81
  *,
82
82
  buffer_id: str | None = None,
83
83
  ) -> dict[str, Any]:
84
- for item in self._commands:
85
- if item["command_id"] == command_id:
86
- return self.create(item["command"], buffer_id=buffer_id)
87
- raise KeyError(f"unknown command: {command_id}")
84
+ return self.create(self._history_store.get_command(command_id), buffer_id=buffer_id)
85
+
86
+ def tool_history(self, limit: int | None = None) -> list[dict[str, Any]]:
87
+ return self._history_store.list_tool_calls(limit)
88
+
89
+ def record_tool_call(
90
+ self,
91
+ tool_name: str,
92
+ arguments: dict[str, Any] | None = None,
93
+ *,
94
+ result: Any = None,
95
+ status: str = "success",
96
+ error: str | None = None,
97
+ ) -> str:
98
+ return self._history_store.record_tool_call(
99
+ tool_name,
100
+ arguments,
101
+ result=result,
102
+ status=status,
103
+ error=error,
104
+ content=_content_from(tool_name, arguments or {}, result),
105
+ command=(arguments or {}).get("command"),
106
+ )
107
+
108
+ def select_tool_call(
109
+ self,
110
+ call_id: str,
111
+ *,
112
+ buffer_id: str | None = None,
113
+ ) -> dict[str, Any]:
114
+ item = self._history_store.get_tool_call(call_id)
115
+ content = item.get("command") or item.get("content")
116
+ if not isinstance(content, str) or not content:
117
+ raise KeyError(f"tool call has no selectable content: {call_id}")
118
+ return self.create(content, buffer_id=buffer_id)
88
119
 
89
120
  def _get(self, buffer_id: str) -> EditBuffer:
90
121
  try:
@@ -104,15 +135,23 @@ class BufferRegistry:
104
135
  def _remember_command(self, command: str) -> None:
105
136
  if not command.strip():
106
137
  return
107
- self._commands.insert(
108
- 0,
109
- {
110
- "command_id": f"cmd-{self._next_command_number}",
111
- "command": command,
112
- },
138
+ self._history_store.record_tool_call(
139
+ "command",
140
+ {"command": command},
141
+ call_id=f"cmd-{self._next_command_number}",
142
+ result={"command": command},
143
+ content=command,
144
+ command=command,
113
145
  )
114
146
  self._next_command_number += 1
115
- del self._commands[10:]
147
+
148
+ def _initial_command_number(self) -> int:
149
+ numbers: list[int] = []
150
+ for item in self._history_store.command_history(limit=1000):
151
+ command_id = item["command_id"]
152
+ if command_id.startswith("cmd-") and command_id[4:].isdigit():
153
+ numbers.append(int(command_id[4:]))
154
+ return max(numbers, default=0) + 1
116
155
 
117
156
 
118
157
  def _record(record: EditRecord) -> dict[str, Any]:
@@ -148,27 +187,23 @@ def create_server() -> Any:
148
187
  json_response=True,
149
188
  )
150
189
 
151
- @server.tool()
152
- def buffer_create(
153
- content: str = "",
154
- buffer_id: str | None = None,
155
- ) -> dict[str, Any]:
156
- """Create an in-memory pending output buffer."""
157
- return _tool_result(
158
- lambda: registry.create(content, buffer_id=buffer_id),
159
- registry,
160
- buffer_id=buffer_id,
161
- )
162
-
163
190
  @server.tool()
164
191
  def buffer_list() -> list[dict[str, Any]]:
165
192
  """List active pending output buffers."""
166
- return registry.list_buffers()
193
+ result = registry.list_buffers()
194
+ registry.record_tool_call("buffer_list", {}, result=result)
195
+ return result
167
196
 
168
197
  @server.tool()
169
198
  def buffer_view(buffer_id: str) -> dict[str, Any]:
170
199
  """View current content, version, snapshots, and commit state."""
171
- return _tool_result(lambda: registry.view(buffer_id), registry, buffer_id=buffer_id)
200
+ return _tool_result(
201
+ lambda: registry.view(buffer_id),
202
+ registry,
203
+ tool_name="buffer_view",
204
+ arguments={"buffer_id": buffer_id},
205
+ buffer_id=buffer_id,
206
+ )
172
207
 
173
208
  @server.tool()
174
209
  def buffer_edit(
@@ -185,6 +220,8 @@ def create_server() -> Any:
185
220
  return _tool_result(
186
221
  lambda: registry.edit(buffer_id, operation),
187
222
  registry,
223
+ tool_name="buffer_edit",
224
+ arguments={"buffer_id": buffer_id, "operation": operation},
188
225
  buffer_id=buffer_id,
189
226
  )
190
227
 
@@ -194,6 +231,8 @@ def create_server() -> Any:
194
231
  return _tool_result(
195
232
  lambda: registry.edit(buffer_id, {"op": "append", "text": text}),
196
233
  registry,
234
+ tool_name="buffer_append",
235
+ arguments={"buffer_id": buffer_id, "text": text},
197
236
  buffer_id=buffer_id,
198
237
  )
199
238
 
@@ -230,6 +269,8 @@ def create_server() -> Any:
230
269
  return _tool_result(
231
270
  lambda: registry.edit(buffer_id, {"op": "delete", "target": target}),
232
271
  registry,
272
+ tool_name="buffer_delete",
273
+ arguments={"buffer_id": buffer_id, "target": target},
233
274
  buffer_id=buffer_id,
234
275
  )
235
276
 
@@ -239,6 +280,8 @@ def create_server() -> Any:
239
280
  return _tool_result(
240
281
  lambda: registry.history(buffer_id),
241
282
  registry,
283
+ tool_name="buffer_history",
284
+ arguments={"buffer_id": buffer_id},
242
285
  buffer_id=buffer_id,
243
286
  )
244
287
 
@@ -248,28 +291,40 @@ def create_server() -> Any:
248
291
  return _tool_result(
249
292
  lambda: registry.rollback(buffer_id, version),
250
293
  registry,
294
+ tool_name="buffer_rollback",
295
+ arguments={"buffer_id": buffer_id, "version": version},
251
296
  buffer_id=buffer_id,
252
297
  )
253
298
 
254
299
  @server.tool()
255
300
  def buffer_commit(buffer_id: str) -> dict[str, Any]:
256
301
  """Commit final output, close the buffer, and remember it as a reusable command."""
257
- return _tool_result(lambda: registry.commit(buffer_id), registry, buffer_id=buffer_id)
302
+ return _tool_result(
303
+ lambda: registry.commit(buffer_id),
304
+ registry,
305
+ tool_name="buffer_commit",
306
+ arguments={"buffer_id": buffer_id},
307
+ buffer_id=buffer_id,
308
+ )
258
309
 
259
310
  @server.tool()
260
- def command_history() -> list[dict[str, Any]]:
261
- """Return up to 10 most recently committed commands, newest first."""
262
- return registry.command_history()
311
+ def tool_history(limit: int = 10) -> list[dict[str, Any]]:
312
+ """Return recent SQLite-backed tool calls, newest first."""
313
+ result = registry.tool_history(limit)
314
+ registry.record_tool_call("tool_history", {"limit": limit}, result=result)
315
+ return result
263
316
 
264
317
  @server.tool()
265
- def command_select(
266
- command_id: str,
318
+ def tool_select(
319
+ call_id: str,
267
320
  buffer_id: str | None = None,
268
321
  ) -> dict[str, Any]:
269
- """Create a new pending buffer from a previous command instead of regenerating it."""
322
+ """Create a pending buffer from selectable content in a previous tool call."""
270
323
  return _tool_result(
271
- lambda: registry.select_command(command_id, buffer_id=buffer_id),
324
+ lambda: registry.select_tool_call(call_id, buffer_id=buffer_id),
272
325
  registry,
326
+ tool_name="tool_select",
327
+ arguments={"call_id": call_id, "buffer_id": buffer_id},
273
328
  buffer_id=buffer_id,
274
329
  )
275
330
 
@@ -286,6 +341,8 @@ def _selection_tool(
286
341
  return _tool_result(
287
342
  lambda: registry.edit(buffer_id, {"op": op, "target": target, "text": text}),
288
343
  registry,
344
+ tool_name=f"buffer_{op}",
345
+ arguments={"buffer_id": buffer_id, "target": target, "text": text},
289
346
  buffer_id=buffer_id,
290
347
  )
291
348
 
@@ -294,18 +351,44 @@ def _tool_result(
294
351
  call: Any,
295
352
  registry: BufferRegistry,
296
353
  *,
354
+ tool_name: str | None = None,
355
+ arguments: dict[str, Any] | None = None,
297
356
  buffer_id: str | None = None,
298
357
  ) -> Any:
299
358
  try:
300
- return call()
359
+ result = call()
360
+ if tool_name is not None:
361
+ registry.record_tool_call(tool_name, arguments or {}, result=result)
362
+ return result
301
363
  except (EditBufferError, KeyError, ValueError) as error:
302
- return {
364
+ result = {
303
365
  "ok": False,
304
366
  "error": _structured_error(
305
367
  error,
306
368
  current_version=registry.current_version(buffer_id),
307
369
  ),
308
370
  }
371
+ if tool_name is not None:
372
+ registry.record_tool_call(
373
+ tool_name,
374
+ arguments or {},
375
+ result=result,
376
+ status="failed",
377
+ error=_message(error),
378
+ )
379
+ return result
380
+
381
+
382
+ def _content_from(tool_name: str, arguments: dict[str, Any], result: Any) -> str | None:
383
+ for key in ("command", "content", "text"):
384
+ value = arguments.get(key)
385
+ if isinstance(value, str) and value:
386
+ return value
387
+ if isinstance(result, dict):
388
+ value = result.get("content")
389
+ if isinstance(value, str) and tool_name in {"buffer_create", "buffer_view"}:
390
+ return value
391
+ return None
309
392
 
310
393
 
311
394
  def _structured_error(
@@ -344,6 +427,10 @@ def _error_type(error: Exception) -> str:
344
427
  return "unknown_buffer"
345
428
  if message.startswith("unknown command:"):
346
429
  return "unknown_command"
430
+ if message.startswith("unknown tool call:"):
431
+ return "unknown_tool_call"
432
+ if message.startswith("tool call has no selectable content:"):
433
+ return "unselectable_tool_call"
347
434
  return "not_found"
348
435
  if isinstance(error, ValueError):
349
436
  message = _message(error)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: editbuffer
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Selection-based mutable output buffer for LLM tools
5
5
  Author: averagedigital
6
6
  License-Expression: MIT
@@ -219,7 +219,6 @@ Claude Desktop and generic MCP client examples are in
219
219
 
220
220
  The server exposes:
221
221
 
222
- - `buffer_create`
223
222
  - `buffer_append`
224
223
  - `buffer_list`
225
224
  - `buffer_view`
@@ -231,8 +230,8 @@ The server exposes:
231
230
  - `buffer_history`
232
231
  - `buffer_rollback`
233
232
  - `buffer_commit`
234
- - `command_history`
235
- - `command_select`
233
+ - `tool_history`
234
+ - `tool_select`
236
235
 
237
236
  Buffers are in-memory and live for the MCP server process. The MCP layer calls
238
237
  the same core API and does not implement separate edit semantics.
@@ -249,10 +248,10 @@ Use the first-class selection tools for normal agent use:
249
248
 
250
249
  `buffer_edit` remains available for raw JSON operations.
251
250
 
252
- `buffer_commit` remembers non-empty committed output as a reusable command.
253
- `command_history` returns the last 10 commands, newest first. `command_select`
254
- creates a new pending buffer from a previous command so the model can reuse it
255
- instead of regenerating it.
251
+ MCP calls are recorded in SQLite-backed history. `tool_history` returns recent
252
+ calls, newest first. `tool_select` creates a pending buffer from selectable
253
+ content in a previous call so the model can repair it instead of regenerating
254
+ it.
256
255
 
257
256
  ## Examples
258
257
 
@@ -20,6 +20,10 @@ src/editbuffer.egg-info/entry_points.txt
20
20
  src/editbuffer.egg-info/requires.txt
21
21
  src/editbuffer.egg-info/top_level.txt
22
22
  tests/test_cli.py
23
+ tests/test_collect_harbor_results.py
23
24
  tests/test_editbuffer.py
24
25
  tests/test_mcp_server.py
25
- tests/test_mcp_stdio_eval.py
26
+ tests/test_mcp_stdio_eval.py
27
+ tests/test_metrics.py
28
+ tests/test_parse_agent_trajectories.py
29
+ tests/test_run_terminal_bench_ab.py