coding-agent-wrapper 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- caw/__init__.py +88 -0
- caw/agent.py +578 -0
- caw/auth/README.md +118 -0
- caw/auth/__init__.py +23 -0
- caw/auth/cli.py +68 -0
- caw/auth/collector.py +324 -0
- caw/auth/linker.py +174 -0
- caw/auth/manifest.py +77 -0
- caw/auth/providers.py +433 -0
- caw/auth/status.py +241 -0
- caw/cli.py +50 -0
- caw/display.py +223 -0
- caw/faststats.py +298 -0
- caw/mcp.py +602 -0
- caw/models.py +385 -0
- caw/pricing.json +15 -0
- caw/pricing.py +33 -0
- caw/provider.py +135 -0
- caw/providers/__init__.py +0 -0
- caw/providers/claude_code.py +648 -0
- caw/providers/codex.py +564 -0
- caw/py.typed +0 -0
- caw/storage.py +184 -0
- caw/toolkit.py +198 -0
- caw/viewer/__init__.py +149 -0
- caw/viewer/static/index.html +847 -0
- coding_agent_wrapper-0.1.0.dist-info/METADATA +213 -0
- coding_agent_wrapper-0.1.0.dist-info/RECORD +31 -0
- coding_agent_wrapper-0.1.0.dist-info/WHEEL +4 -0
- coding_agent_wrapper-0.1.0.dist-info/entry_points.txt +2 -0
- coding_agent_wrapper-0.1.0.dist-info/licenses/LICENSE +202 -0
caw/faststats.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""Fast extraction of frequently-needed statistics from trajectory files.
|
|
2
|
+
|
|
3
|
+
A full ``Trajectory.from_dict(json.loads(...))`` round-trip is slow on large
|
|
4
|
+
trajectories because the ``turns`` array can hold many MB of tool I/O that
|
|
5
|
+
the caller does not need. Most consumers (cost dashboards, spend limiters,
|
|
6
|
+
list views) only want a handful of header / footer fields:
|
|
7
|
+
|
|
8
|
+
cost_usd, model, created_at, completed_at, duration_ms, token totals.
|
|
9
|
+
|
|
10
|
+
``FastStats`` extracts these by reading only the head and tail of the file
|
|
11
|
+
(~8 KB total) and parsing the predictable indent=2 layout that
|
|
12
|
+
:class:`caw.storage.SessionStore` writes. The fast path is roughly 3x
|
|
13
|
+
quicker than ``json.loads`` on a directory of small trajectories and 25x+
|
|
14
|
+
faster on multi-MB files. When the fast path fails (non-CAW layout, hand
|
|
15
|
+
edited file, etc.) it falls back to a full JSON parse.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import re
|
|
22
|
+
from dataclasses import asdict, dataclass
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Any, Iterable, Iterator, Optional
|
|
25
|
+
|
|
26
|
+
from caw.models import Trajectory
|
|
27
|
+
|
|
28
|
+
__all__ = ["FastStats"]
|
|
29
|
+
|
|
30
|
+
# Bytes read from the head and tail of each file. The CAW writer puts every
|
|
31
|
+
# header field (agent, model, session_id, created_at, completed_at,
|
|
32
|
+
# usage_limited) in the first ~300 bytes and the trailing usage / total_usage
|
|
33
|
+
# / duration_ms / metadata block in the last few hundred bytes, so 4 KB on
|
|
34
|
+
# each side is plenty of headroom even for files with long ``system_prompt``
|
|
35
|
+
# values stretching the header.
|
|
36
|
+
_HEAD_BYTES = 4096
|
|
37
|
+
_TAIL_BYTES = 4096
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _str_field(blob: str, key: str) -> str:
|
|
41
|
+
"""Return the first JSON string value for ``"key": "..."`` in *blob*."""
|
|
42
|
+
m = re.search(rf'"{re.escape(key)}"\s*:\s*"((?:[^"\\]|\\.)*)"', blob)
|
|
43
|
+
if not m:
|
|
44
|
+
return ""
|
|
45
|
+
raw = m.group(1)
|
|
46
|
+
# Decode JSON escapes (e.g. \", \\, \n, \uXXXX) by parsing as a JSON string
|
|
47
|
+
try:
|
|
48
|
+
return json.loads(f'"{raw}"')
|
|
49
|
+
except json.JSONDecodeError:
|
|
50
|
+
return raw
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _num_field(blob: str, key: str, *, default: float = 0.0) -> float:
|
|
54
|
+
"""Return the first JSON number value for ``"key": <num>`` in *blob*."""
|
|
55
|
+
m = re.search(rf'"{re.escape(key)}"\s*:\s*([0-9eE.+-]+)', blob)
|
|
56
|
+
return float(m.group(1)) if m else default
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _bool_field(blob: str, key: str) -> bool:
|
|
60
|
+
m = re.search(rf'"{re.escape(key)}"\s*:\s*(true|false)', blob)
|
|
61
|
+
return bool(m and m.group(1) == "true")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class FastStats:
|
|
66
|
+
"""Lightweight statistics for a CAW trajectory.
|
|
67
|
+
|
|
68
|
+
The class is intentionally narrow: it exposes only the fields that
|
|
69
|
+
consumers ask for repeatedly without paying for a full trajectory parse.
|
|
70
|
+
For everything else (turns, tool calls, content blocks) load the file
|
|
71
|
+
via :meth:`caw.agent.Session.load_trajectory` instead.
|
|
72
|
+
|
|
73
|
+
All ``cost_usd`` / token values come from the trajectory's
|
|
74
|
+
``total_usage`` (recursive across subagents) when present, falling back
|
|
75
|
+
to ``usage`` for older trajectories that did not record it separately.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
path: Optional[Path] = None
|
|
79
|
+
|
|
80
|
+
# Header fields (from the start of the file).
|
|
81
|
+
agent: str = ""
|
|
82
|
+
model: str = ""
|
|
83
|
+
session_id: str = ""
|
|
84
|
+
created_at: str = ""
|
|
85
|
+
completed_at: str = ""
|
|
86
|
+
usage_limited: bool = False
|
|
87
|
+
|
|
88
|
+
# Footer fields (from the tail of the file).
|
|
89
|
+
duration_ms: int = 0
|
|
90
|
+
cost_usd: float = 0.0
|
|
91
|
+
input_tokens: int = 0
|
|
92
|
+
output_tokens: int = 0
|
|
93
|
+
cache_read_tokens: int = 0
|
|
94
|
+
cache_write_tokens: int = 0
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def total_tokens(self) -> int:
|
|
98
|
+
return self.input_tokens + self.output_tokens
|
|
99
|
+
|
|
100
|
+
def to_dict(self) -> dict[str, Any]:
|
|
101
|
+
"""Return a JSON-serializable dict (``path`` is stringified)."""
|
|
102
|
+
d = asdict(self)
|
|
103
|
+
d["path"] = str(self.path) if self.path is not None else None
|
|
104
|
+
return d
|
|
105
|
+
|
|
106
|
+
# ------------------------------------------------------------------
|
|
107
|
+
# Constructors
|
|
108
|
+
# ------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def from_trajectory(cls, trajectory: Trajectory, *, path: str | Path | None = None) -> FastStats:
|
|
112
|
+
"""Build :class:`FastStats` from an in-memory :class:`Trajectory`."""
|
|
113
|
+
usage = trajectory.total_usage
|
|
114
|
+
return cls(
|
|
115
|
+
path=Path(path) if path is not None else None,
|
|
116
|
+
agent=trajectory.agent,
|
|
117
|
+
model=trajectory.model,
|
|
118
|
+
session_id=trajectory.session_id,
|
|
119
|
+
created_at=trajectory.created_at,
|
|
120
|
+
completed_at=trajectory.completed_at,
|
|
121
|
+
usage_limited=trajectory.usage_limited,
|
|
122
|
+
duration_ms=trajectory.duration_ms,
|
|
123
|
+
cost_usd=usage.cost_usd,
|
|
124
|
+
input_tokens=usage.input_tokens,
|
|
125
|
+
output_tokens=usage.output_tokens,
|
|
126
|
+
cache_read_tokens=usage.cache_read_tokens,
|
|
127
|
+
cache_write_tokens=usage.cache_write_tokens,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def from_path(cls, path: str | Path) -> Optional[FastStats]:
|
|
132
|
+
"""Read fast stats from *path*.
|
|
133
|
+
|
|
134
|
+
Returns ``None`` if the file does not exist, is empty, or is not a
|
|
135
|
+
recognizable trajectory file. Tries the head/tail fast path first
|
|
136
|
+
and falls back to a full JSON parse on failure.
|
|
137
|
+
"""
|
|
138
|
+
path = Path(path)
|
|
139
|
+
try:
|
|
140
|
+
size = path.stat().st_size
|
|
141
|
+
except OSError:
|
|
142
|
+
return None
|
|
143
|
+
if size == 0:
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
with open(path, "rb") as f:
|
|
148
|
+
head_len = min(_HEAD_BYTES, size)
|
|
149
|
+
head = f.read(head_len).decode("utf-8", errors="replace")
|
|
150
|
+
if size <= _HEAD_BYTES:
|
|
151
|
+
tail = head
|
|
152
|
+
elif size <= _HEAD_BYTES + _TAIL_BYTES:
|
|
153
|
+
tail = head + f.read().decode("utf-8", errors="replace")
|
|
154
|
+
else:
|
|
155
|
+
f.seek(size - _TAIL_BYTES)
|
|
156
|
+
tail = f.read(_TAIL_BYTES).decode("utf-8", errors="replace")
|
|
157
|
+
except OSError:
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
stats = cls._fast_extract(head, tail, path)
|
|
161
|
+
if stats is not None:
|
|
162
|
+
return stats
|
|
163
|
+
|
|
164
|
+
# Fallback: parse the full document. ``Trajectory.from_dict``
|
|
165
|
+
# tolerates missing keys, so we explicitly require ``model`` to be
|
|
166
|
+
# present and non-empty before treating the file as a trajectory.
|
|
167
|
+
try:
|
|
168
|
+
data = json.loads(path.read_bytes())
|
|
169
|
+
except (OSError, ValueError):
|
|
170
|
+
return None
|
|
171
|
+
if not isinstance(data, dict) or not data.get("model"):
|
|
172
|
+
return None
|
|
173
|
+
try:
|
|
174
|
+
traj = Trajectory.from_dict(data)
|
|
175
|
+
except (ValueError, KeyError, TypeError):
|
|
176
|
+
return None
|
|
177
|
+
return cls.from_trajectory(traj, path=path)
|
|
178
|
+
|
|
179
|
+
# ------------------------------------------------------------------
|
|
180
|
+
# Directory iteration
|
|
181
|
+
# ------------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
@classmethod
|
|
184
|
+
def iter_directory(
|
|
185
|
+
cls,
|
|
186
|
+
directory: str | Path,
|
|
187
|
+
*,
|
|
188
|
+
patterns: Iterable[str] = ("**/trajectory.json", "**/*.traj.json"),
|
|
189
|
+
skip_parts: Iterable[str] = (),
|
|
190
|
+
) -> Iterator[FastStats]:
|
|
191
|
+
"""Yield :class:`FastStats` for every trajectory file under *directory*.
|
|
192
|
+
|
|
193
|
+
``patterns`` is a list of globs (relative to *directory*) to scan;
|
|
194
|
+
the default catches both the canonical CAW layout
|
|
195
|
+
(``sessions/<id>/trajectory.json``) and the ``.traj.json`` files
|
|
196
|
+
produced by ad-hoc writers. Files whose path contains any directory
|
|
197
|
+
component listed in ``skip_parts`` are excluded. Unreadable or
|
|
198
|
+
malformed files are silently dropped.
|
|
199
|
+
"""
|
|
200
|
+
directory = Path(directory)
|
|
201
|
+
if not directory.is_dir():
|
|
202
|
+
return
|
|
203
|
+
skip_set = set(skip_parts)
|
|
204
|
+
seen: set[Path] = set()
|
|
205
|
+
for pattern in patterns:
|
|
206
|
+
for file in directory.glob(pattern):
|
|
207
|
+
if file in seen or not file.is_file():
|
|
208
|
+
continue
|
|
209
|
+
seen.add(file)
|
|
210
|
+
if skip_set:
|
|
211
|
+
try:
|
|
212
|
+
rel_parts = file.relative_to(directory).parts
|
|
213
|
+
except ValueError:
|
|
214
|
+
rel_parts = file.parts
|
|
215
|
+
if any(part in skip_set for part in rel_parts):
|
|
216
|
+
continue
|
|
217
|
+
stats = cls.from_path(file)
|
|
218
|
+
if stats is not None:
|
|
219
|
+
yield stats
|
|
220
|
+
|
|
221
|
+
@classmethod
|
|
222
|
+
def directory_total_cost(
|
|
223
|
+
cls,
|
|
224
|
+
directory: str | Path,
|
|
225
|
+
**kwargs: Any,
|
|
226
|
+
) -> float:
|
|
227
|
+
"""Sum ``cost_usd`` across every trajectory under *directory*.
|
|
228
|
+
|
|
229
|
+
Extra keyword arguments are forwarded to :meth:`iter_directory`.
|
|
230
|
+
"""
|
|
231
|
+
return sum(s.cost_usd for s in cls.iter_directory(directory, **kwargs))
|
|
232
|
+
|
|
233
|
+
# ------------------------------------------------------------------
|
|
234
|
+
# Internals
|
|
235
|
+
# ------------------------------------------------------------------
|
|
236
|
+
|
|
237
|
+
@classmethod
|
|
238
|
+
def _fast_extract(cls, head: str, tail: str, path: Path) -> Optional[FastStats]:
|
|
239
|
+
"""Pull fields from the raw head/tail text. Returns ``None`` on miss."""
|
|
240
|
+
# ``model`` is a required CAW field. Its absence in the head means
|
|
241
|
+
# this isn't a CAW trajectory and the caller should fall back.
|
|
242
|
+
model = _str_field(head, "model")
|
|
243
|
+
if not model:
|
|
244
|
+
return None
|
|
245
|
+
|
|
246
|
+
agent = _str_field(head, "agent")
|
|
247
|
+
session_id = _str_field(head, "session_id")
|
|
248
|
+
created_at = _str_field(head, "created_at")
|
|
249
|
+
completed_at = _str_field(head, "completed_at")
|
|
250
|
+
usage_limited = _bool_field(head, "usage_limited")
|
|
251
|
+
|
|
252
|
+
# The trailing top-level usage block. With indent=2 the canonical
|
|
253
|
+
# writer always emits ``\n "total_usage": {`` (or ``"usage": {``)
|
|
254
|
+
# at column 2 — nested usage blocks inside ``turns`` use indent 6+,
|
|
255
|
+
# so anchoring on the 2-space form is unambiguous.
|
|
256
|
+
anchor = tail.rfind('\n "total_usage": {')
|
|
257
|
+
if anchor == -1:
|
|
258
|
+
anchor = tail.rfind('\n "usage": {')
|
|
259
|
+
if anchor == -1:
|
|
260
|
+
return None
|
|
261
|
+
|
|
262
|
+
trailing = tail[anchor:]
|
|
263
|
+
# Find the first matching closing brace. ``UsageStats`` has no
|
|
264
|
+
# nested objects so a simple ``find`` is correct.
|
|
265
|
+
end = trailing.find("}")
|
|
266
|
+
if end == -1:
|
|
267
|
+
return None
|
|
268
|
+
usage_blob = trailing[:end]
|
|
269
|
+
|
|
270
|
+
cost_usd = _num_field(usage_blob, "cost_usd")
|
|
271
|
+
input_tokens = int(_num_field(usage_blob, "input_tokens"))
|
|
272
|
+
output_tokens = int(_num_field(usage_blob, "output_tokens"))
|
|
273
|
+
cache_read_tokens = int(_num_field(usage_blob, "cache_read_tokens"))
|
|
274
|
+
cache_write_tokens = int(_num_field(usage_blob, "cache_write_tokens"))
|
|
275
|
+
|
|
276
|
+
# ``duration_ms`` lives at the very end of the file, after the usage
|
|
277
|
+
# blocks. Search the slice past the usage block we just consumed to
|
|
278
|
+
# avoid matching any per-turn ``duration_ms`` that might still be in
|
|
279
|
+
# the tail buffer.
|
|
280
|
+
post_usage = trailing[end:]
|
|
281
|
+
m = re.search(r'\n "duration_ms"\s*:\s*([0-9]+)', post_usage)
|
|
282
|
+
duration_ms = int(m.group(1)) if m else 0
|
|
283
|
+
|
|
284
|
+
return cls(
|
|
285
|
+
path=path,
|
|
286
|
+
agent=agent,
|
|
287
|
+
model=model,
|
|
288
|
+
session_id=session_id,
|
|
289
|
+
created_at=created_at,
|
|
290
|
+
completed_at=completed_at,
|
|
291
|
+
usage_limited=usage_limited,
|
|
292
|
+
duration_ms=duration_ms,
|
|
293
|
+
cost_usd=cost_usd,
|
|
294
|
+
input_tokens=input_tokens,
|
|
295
|
+
output_tokens=output_tokens,
|
|
296
|
+
cache_read_tokens=cache_read_tokens,
|
|
297
|
+
cache_write_tokens=cache_write_tokens,
|
|
298
|
+
)
|